diff --git a/.buildkite/test-e2e.yml b/.buildkite/test-e2e.yml new file mode 100644 index 00000000000..e5ce56cb18c --- /dev/null +++ b/.buildkite/test-e2e.yml @@ -0,0 +1,47 @@ +- label: 'Test E2E (nightly operator)' + instance_size: large + image: golang:1.22 + commands: + - source .buildkite/setup-env.sh + - kind create cluster --wait 900s --config ./tests/framework/config/kind-config-buildkite.yml + - kubectl config set clusters.kind-kind.server https://docker:6443 + # Build nightly KubeRay operator image + - pushd ray-operator + - IMG=kuberay/operator:nightly make docker-image + - kind load docker-image kuberay/operator:nightly + - IMG=kuberay/operator:nightly make deploy + - kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + # Run e2e tests and print KubeRay operator logs if tests fail + - KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/e2e || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay && exit 1) + +- label: 'Test E2E rayservice (nightly operator)' + instance_size: large + image: golang:1.22 + commands: + - source .buildkite/setup-env.sh + - kind create cluster --wait 900s --config ./tests/framework/config/kind-config-buildkite.yml + - kubectl config set clusters.kind-kind.server https://docker:6443 + # Build nightly KubeRay operator image + - pushd ray-operator + - IMG=kuberay/operator:nightly make docker-image + - kind load docker-image kuberay/operator:nightly + - IMG=kuberay/operator:nightly make deploy + - kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + # Run e2e tests and print KubeRay operator logs if tests fail + - KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/e2erayservice || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay && exit 1) + +- label: 'Test Autoscaler E2E (nightly operator)' + instance_size: large + image: golang:1.22 + commands: + - source .buildkite/setup-env.sh + - kind create cluster --wait 900s --config ./tests/framework/config/kind-config-buildkite.yml + - kubectl config set clusters.kind-kind.server https://docker:6443 + # Build nightly KubeRay operator image + - pushd ray-operator + - IMG=kuberay/operator:nightly make docker-image + - kind load docker-image kuberay/operator:nightly + - IMG=kuberay/operator:nightly make deploy + - kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + # Run e2e tests and print KubeRay operator logs if tests fail + - KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/e2eautoscaler || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay && exit 1) diff --git a/.buildkite/test-sample-yamls.yml b/.buildkite/test-sample-yamls.yml index 764382070cf..eb5bbf6cbe2 100644 --- a/.buildkite/test-sample-yamls.yml +++ b/.buildkite/test-sample-yamls.yml @@ -1,61 +1,33 @@ -#ci:group=:yaml: Sample YAML tests - -- label: 'Test RayCluster Sample YAMLs (nightly operator)' - instance_size: large - image: golang:1.20 - commands: - - ./.buildkite/setup-env.sh - # Build nightly KubeRay operator image - - pushd ray-operator - - IMG=kuberay/operator:nightly make docker-image - - popd - # Use nightly KubeRay operator image - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=kuberay/operator:nightly python3 tests/test_sample_raycluster_yamls.py - -- label: 'Test RayCluster Sample YAMLs (latest release)' - instance_size: large - image: golang:1.20 - commands: - - ./.buildkite/setup-env.sh - # Use KubeRay operator image from the latest release - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=quay.io/kuberay/operator:v1.1.0 python3 tests/test_sample_raycluster_yamls.py - -- label: 'Test RayJob Sample YAMLs (nightly operator)' +- label: 'Test Sample YAMLs (nightly operator)' instance_size: large - image: golang:1.20 + image: golang:1.22 commands: - - ./.buildkite/setup-env.sh + - source .buildkite/setup-env.sh + - kind create cluster --wait 900s --config ./tests/framework/config/kind-config-buildkite.yml + - kubectl config set clusters.kind-kind.server https://docker:6443 # Build nightly KubeRay operator image - pushd ray-operator - IMG=kuberay/operator:nightly make docker-image - - popd - # Use nightly KubeRay operator image - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=kuberay/operator:nightly python3 tests/test_sample_rayjob_yamls.py + - kind load docker-image kuberay/operator:nightly + - IMG=kuberay/operator:nightly make deploy + - kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + # Run sample YAML tests + - KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/sampleyaml + # Printing KubeRay operator logs + - kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay -- label: 'Test RayJob Sample YAMLs (latest release)' +- label: 'Test Sample YAMLs (latest release)' instance_size: large - image: golang:1.20 + image: golang:1.22 commands: - - ./.buildkite/setup-env.sh - # Use KubeRay operator image from the latest release - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=quay.io/kuberay/operator:v1.1.0 python3 tests/test_sample_rayjob_yamls.py - -- label: 'Test RayService Sample YAMLs (nightly operator)' - instance_size: large - image: golang:1.20 - commands: - - ./.buildkite/setup-env.sh - # Build nightly KubeRay operator image + - source .buildkite/setup-env.sh + - kind create cluster --wait 900s --config ./tests/framework/config/kind-config-buildkite.yml + - kubectl config set clusters.kind-kind.server https://docker:6443 + # Deploy KubeRay operator - pushd ray-operator - - IMG=kuberay/operator:nightly make docker-image - - popd - # Use nightly KubeRay operator image - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=kuberay/operator:nightly python3 tests/test_sample_rayservice_yamls.py - -- label: 'Test RayService Sample YAMLs (latest release)' - instance_size: large - image: golang:1.20 - commands: - - ./.buildkite/setup-env.sh - # Use KubeRay operator image from the latest release - - source .venv/bin/activate && BUILDKITE_ENV=true OPERATOR_IMAGE=quay.io/kuberay/operator:v1.1.0 python3 tests/test_sample_rayservice_yamls.py + - IMG=quay.io/kuberay/operator:v1.2.2 make deploy + - kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + # Run sample YAML tests + - KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/sampleyaml + # Printing KubeRay operator logs + - kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 4d284e8b1cf..17d75fb485e 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -7,7 +7,7 @@ body: attributes: value: | Thank you for reporting the problem! - Please make sure what you are reporting is a bug with reproducible steps. + Please make sure what you are reporting is a bug with reproducible steps. - type: checkboxes attributes: diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 616d73cb796..6b05cbae8d9 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,7 +12,7 @@ ## Checks -- [ ] I've made sure the tests are passing. +- [ ] I've made sure the tests are passing. - Testing Strategy - [ ] Unit tests - [ ] Manual tests diff --git a/.github/workflows/actions/compatibility/action.yaml b/.github/workflows/actions/compatibility/action.yaml index 2353c8c07e7..4e869fa7573 100644 --- a/.github/workflows/actions/compatibility/action.yaml +++ b/.github/workflows/actions/compatibility/action.yaml @@ -37,7 +37,7 @@ runs: uses: docker-practice/actions-setup-docker@master - name: Download Artifact Operator - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: operator_img path: /tmp diff --git a/.github/workflows/consistency-check.yaml b/.github/workflows/consistency-check.yaml index c0146c08afc..012957b1db0 100644 --- a/.github/workflows/consistency-check.yaml +++ b/.github/workflows/consistency-check.yaml @@ -10,7 +10,7 @@ on: jobs: # Check consistency between types.go and generated API. ray-operator-verify-codegen: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -21,7 +21,7 @@ jobs: uses: actions/setup-go@v3 with: # Use the same go version with build job - go-version: v1.20 + go-version: v1.22 - name: Check golang version working-directory: ./ray-operator @@ -37,7 +37,7 @@ jobs: # Check consistency between types.go and generated API reference. ray-operator-verify-api-docs: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -48,7 +48,7 @@ jobs: uses: actions/setup-go@v3 with: # Use the same go version with build job - go-version: v1.20 + go-version: v1.22 - name: Check golang version working-directory: ./ray-operator @@ -61,7 +61,7 @@ jobs: # 1. Check consistency between types.go and CRD YAML files. # 2. Check consistency between kubebuilder markers and RBAC. ray-operator-verify-crd-rbac: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -72,7 +72,7 @@ jobs: uses: actions/setup-go@v3 with: # Use the same go version with build job - go-version: v1.20 + go-version: v1.22 - name: Update CRD/RBAC YAML files working-directory: ./ray-operator @@ -96,7 +96,7 @@ jobs: # and in helm-chart/kuberay-operator/crds helm-chart-verify-crd: needs: ray-operator-verify-crd-rbac - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -125,7 +125,7 @@ jobs: # and in helm-chart/kuberay-operator/templates helm-chart-verify-rbac: needs: ray-operator-verify-crd-rbac - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml deleted file mode 100644 index 84f151cf694..00000000000 --- a/.github/workflows/e2e-tests.yaml +++ /dev/null @@ -1,94 +0,0 @@ -name: e2e - -on: - pull_request: - branches: - - master - - 'release-*' - paths-ignore: - - 'apiserver/**' - - 'cli/**' - - 'docs/**' - - '**.adoc' - - '**.md' - - 'LICENSE' - push: - branches: - - master - - 'release-*' - paths-ignore: - - 'apiserver/**' - - 'cli/**' - - 'docs/**' - - '**.adoc' - - '**.md' - - 'LICENSE' - -concurrency: - group: ${{ github.head_ref }}-${{ github.workflow }} - cancel-in-progress: true - -jobs: - ray-operator: - - runs-on: ubuntu-20.04 - - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Set up Go - uses: actions/setup-go@v3 - with: - go-version: v1.20 - - - name: Set up gotestfmt - uses: gotesttools/gotestfmt-action@v2 - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup and start KinD cluster - uses: ./.github/workflows/actions/kind - - - name: Deploy Kuberay operator - id: deploy - run: | - echo Deploying Kuberay operator - cd ray-operator - - IMG="${REGISTRY_ADDRESS}"/kuberay - make docker-build -e IMG="${IMG}" -e ENGINE=podman - make docker-push -e IMG="${IMG}" -e ENGINE=podman - - make deploy -e IMG="${IMG}" - kubectl wait --timeout=90s --for=condition=Available=true deployment -n ray-system kuberay-operator - - - name: Run e2e tests - run: | - export KUBERAY_TEST_TIMEOUT_SHORT=1m - export KUBERAY_TEST_TIMEOUT_MEDIUM=5m - export KUBERAY_TEST_TIMEOUT_LONG=10m - - export KUBERAY_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }} - echo "KUBERAY_TEST_OUTPUT_DIR=${KUBERAY_TEST_OUTPUT_DIR}" >> $GITHUB_ENV - - set -euo pipefail - cd ray-operator - go test -timeout 30m -v ./test/e2e -json 2>&1 | tee ${KUBERAY_TEST_OUTPUT_DIR}/gotest.log | gotestfmt - - - name: Print KubeRay operator logs - if: always() && steps.deploy.outcome == 'success' - run: | - echo "Printing KubeRay operator logs" - kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${KUBERAY_TEST_OUTPUT_DIR}/kuberay-operator.log - - - name: Upload logs - uses: actions/upload-artifact@v3 - if: always() && steps.deploy.outcome == 'success' - with: - name: logs - retention-days: 10 - path: | - ${{ env.KUBERAY_TEST_OUTPUT_DIR }}/**/*.log diff --git a/.github/workflows/helm-lint.yaml b/.github/workflows/helm-lint.yaml index 9f0c4002e41..49137f09ebb 100644 --- a/.github/workflows/helm-lint.yaml +++ b/.github/workflows/helm-lint.yaml @@ -9,7 +9,7 @@ on: jobs: lint-test-helm-3-4: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout Code uses: actions/checkout@v3 @@ -35,7 +35,7 @@ jobs: run: ct lint --all --chart-dirs helm-chart/ --validate-maintainers=false lint-test-helm-3-9: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout Code uses: actions/checkout@v3 diff --git a/.github/workflows/image-release.yaml b/.github/workflows/image-release.yaml index b4b8c72521f..f06f6a2eb8b 100644 --- a/.github/workflows/image-release.yaml +++ b/.github/workflows/image-release.yaml @@ -15,13 +15,13 @@ jobs: env: working-directory: ./apiserver name: Release APIServer Docker Image - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: v1.20 + go-version: v1.22 - name: Check out code into the Go module directory uses: actions/checkout@v2 @@ -75,13 +75,13 @@ jobs: env: working-directory: ./ray-operator name: Release Operator Docker Images - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: v1.20 + go-version: v1.22 - name: Check out code into the Go module directory uses: actions/checkout@v2 diff --git a/.github/workflows/kubectl-plugin-e2e-tests.yaml b/.github/workflows/kubectl-plugin-e2e-tests.yaml new file mode 100644 index 00000000000..9e5a354857d --- /dev/null +++ b/.github/workflows/kubectl-plugin-e2e-tests.yaml @@ -0,0 +1,99 @@ +name: kubectl plugin e2e test + +on: + pull_request: + branches: + - master + - 'release-*' + paths-ignore: + - 'apiserver/**' + - 'docs/**' + - '**.adoc' + - '**.md' + - 'LICENSE' + push: + branches: + - master + - 'release-*' + paths-ignore: + - 'apiserver/**' + - 'docs/**' + - '**.adoc' + - '**.md' + - 'LICENSE' + +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v3 + with: + go-version: v1.22 + + - name: Set up gotestfmt + uses: gotesttools/gotestfmt-action@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup and start KinD cluster + uses: ./.github/workflows/actions/kind + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Ray + run: | + python --version + pip install -U "ray[default]==2.40.0" + + - name: Build CLI and Add to PATH + run: | + cd ./kubectl-plugin + + go mod download + go build -o kubectl-ray -a ./cmd/kubectl-ray.go + + cp ./kubectl-ray /usr/local/bin + + - name: Deploy Kuberay operator + id: deploy + run: | + echo Deploying Kuberay operator + cd ray-operator + + IMG="${REGISTRY_ADDRESS}"/kuberay + make docker-build -e IMG="${IMG}" -e ENGINE=podman + make docker-push -e IMG="${IMG}" -e ENGINE=podman + + make deploy -e IMG="${IMG}" + kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator + + - name: Run e2e tests + run: | + export KUBERAY_TEST_TIMEOUT_SHORT=1m + export KUBERAY_TEST_TIMEOUT_MEDIUM=5m + export KUBERAY_TEST_TIMEOUT_LONG=10m + + export KUBERAY_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }} + echo "KUBERAY_TEST_OUTPUT_DIR=${KUBERAY_TEST_OUTPUT_DIR}" >> $GITHUB_ENV + + set -euo pipefail + cd kubectl-plugin + go test -timeout 60m -v ./test/e2e -json 2>&1 | tee ${KUBERAY_TEST_OUTPUT_DIR}/gotest.log | gotestfmt + + - name: Upload logs + uses: actions/upload-artifact@v4 + if: always() && steps.deploy.outcome == 'success' + with: + name: logs + retention-days: 10 + path: | + ${{ env.KUBERAY_TEST_OUTPUT_DIR }}/**/*.log diff --git a/.github/workflows/kubectl-plugin-release.yaml b/.github/workflows/kubectl-plugin-release.yaml new file mode 100644 index 00000000000..cbd2cc976e0 --- /dev/null +++ b/.github/workflows/kubectl-plugin-release.yaml @@ -0,0 +1,33 @@ +name: release-kubectl-plugin +on: + pull_request: + workflow_dispatch: + inputs: + tag: + description: 'Desired release version tag (e.g. v1.1.0-rc.1).' + required: true +jobs: + release-kubectl-plugin: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.tag }} + - name: Show tag + run: git show HEAD +# - name: Setup Go +# uses: actions/setup-go@v5 +# with: +# go-version: '1.22' +# - name: GoReleaser +# uses: goreleaser/goreleaser-action@v6 +# with: +# distribution: 'goreleaser' +# version: latest +# args: release --clean +# workdir: 'kubectl-plugin' +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# - name: Update new version in krew-index +# uses: rajatjindal/krew-release-bot@v0.0.46 diff --git a/.github/workflows/test-job.yaml b/.github/workflows/test-job.yaml index 9e251b5643e..67c72711e0e 100644 --- a/.github/workflows/test-job.yaml +++ b/.github/workflows/test-job.yaml @@ -9,117 +9,35 @@ on: jobs: lint: - name: Lint - runs-on: ubuntu-latest + name: Lint (pre-commit) + runs-on: ubuntu-22.04 steps: - - name: Set up Go - uses: actions/setup-go@v3 - with: - # Use the same go version with build job - go-version: v1.20 - - - name: Check out code into the Go module directory - uses: actions/checkout@v2 - with: - # When checking out the repository that - # triggered a workflow, this defaults to the reference or SHA for that event. - # Default value should work for both pull_request and merge(push) event. - ref: ${{github.event.pull_request.head.sha}} - - - name: Install goimports and gofumpt - run: | - go install golang.org/x/tools/cmd/goimports@latest - go install mvdan.cc/gofumpt@v0.3.1 - - - name: Run gofmt - uses: Jerome1337/gofmt-action@v1.0.4 - with: - gofmt-path: 'apiserver cli ray-operator' - gofmt-flags: '-l -d -s' - - - name: Run linter against ray operator - uses: golangci/golangci-lint-action@v2 - with: - # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version - version: v1.54.1 - - # Optional: working directory, useful for monorepos - working-directory: ./ray-operator - - # Optional: golangci-lint command line arguments. - # args: --issues-exit-code=0 - args: --timeout=3m - - # Optional: show only new issues if it's a pull request. The default value is `false`. - # only-new-issues: true - - # Optional: if set to true then the action will use pre-installed Go. - skip-go-installation: true - - # Optional: if set to true then the action don't cache or restore ~/go/pkg. - skip-pkg-cache: true - - # Optional: if set to true then the action don't cache or restore ~/.cache/go-build. - # skip-build-cache: true - - - name: Run linter against apiserver - uses: golangci/golangci-lint-action@v2 - with: - version: v1.54.1 - working-directory: ./apiserver - args: --timeout=3m --exclude='SA1019' - skip-go-installation: true - skip-pkg-cache: true - - - name: Run linter against security proxy - uses: golangci/golangci-lint-action@v2 - with: - version: v1.54.1 - working-directory: ./experimental - args: --timeout=3m - skip-go-installation: true - skip-pkg-cache: true - - - name: Run linter against cli - uses: golangci/golangci-lint-action@v2 - with: - version: v1.54.1 - working-directory: ./cli - args: --timeout=3m - skip-go-installation: true - skip-pkg-cache: true - - - name: Run goimports - run: test -z "$(set -o pipefail && $(go env GOPATH)/bin/goimports -l apiserver/ cli/ $(find ./ray-operator -name "*.go" | grep -v zz_generated.deepcopy.go) | tee goimports.out)" || { cat goimports.out && exit 1; } - - - name: Open this to see how to fix goimports if it fails - run: | - echo "Run command 'goimports -w apiserver/ cli/ $(find ./ray-operator -name "*.go" | grep -v zz_generated.deepcopy.go)' to correct your code format." - echo "Proposed format changes:" - $(go env GOPATH)/bin/goimports -d apiserver/ cli/ $(find ./ray-operator -name "*.go" | grep -v zz_generated.deepcopy.go) - if: failure() + - name: Install golangci-lint + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.60.3 + mv ./bin/golangci-lint /usr/local/bin/golangci-lint + shell: bash - - name: Run gofumpt - run: test -z "$(set -o pipefail && $(go env GOPATH)/bin/gofumpt -l apiserver/ ray-operator/ cli/ | tee gofumpt.out)" || { cat gofumpt.out && exit 1; } + - name: Install kubeconform + run: | + curl -L https://github.com/yannh/kubeconform/releases/download/v0.6.7/kubeconform-linux-amd64.tar.gz -o kubeconform.tar.gz + tar -xzf kubeconform.tar.gz + mv kubeconform /usr/local/bin/ - - name: Open this to see how to fix gofumpt if it fails - run: | - echo "Run command 'gofumpt -w apiserver/ ray-operator/ cli/' to correct your code format." - echo "Proposed format changes:" - $(go env GOPATH)/bin/gofumpt -d apiserver/ ray-operator/ cli/ - if: failure() + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.1 build_apiserver: env: working-directory: ./apiserver - cli-working-directory: ./cli - name: Build Apiserver, CLI Binaries and Docker Images - runs-on: ubuntu-latest + name: Build Apiserver and Docker Images + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: v1.20 + go-version: v1.22 - name: Check out code into the Go module directory uses: actions/checkout@v2 @@ -165,7 +83,7 @@ jobs: docker save -o /tmp/apiserver.tar kuberay/apiserver:${{ steps.vars.outputs.sha_short }} - name: Upload Artifact Apiserver - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: apiserver_img path: /tmp/apiserver.tar @@ -186,21 +104,17 @@ jobs: docker push quay.io/kuberay/apiserver:nightly if: contains(fromJson('["refs/heads/master"]'), github.ref) - - name: Build CLI - run: go build -o kuberay -a main.go - working-directory: ${{env.cli-working-directory}} - build_security_proxy: env: working-directory: ./experimental name: Build security proxy Binaries and Docker Images - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: v1.20 + go-version: v1.22 - name: Check out code into the Go module directory uses: actions/checkout@v2 @@ -243,7 +157,7 @@ jobs: working-directory: ${{env.working-directory}} - name: Upload security proxy artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: security-proxy_img path: /tmp/security-proxy.tar @@ -268,13 +182,13 @@ jobs: env: working-directory: ./ray-operator name: Build Operator Binaries and Docker Images - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: v1.20 + go-version: v1.22 - name: Check out code into the Go module directory uses: actions/checkout@v2 @@ -321,7 +235,7 @@ jobs: working-directory: ${{env.working-directory}} - name: Upload Artifact Operator - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: operator_img path: /tmp/operator.tar @@ -374,12 +288,43 @@ jobs: quay.io/${{env.REPO_ORG}}/${{env.REPO_NAME}}:nightly if: contains(fromJson('["refs/heads/master"]'), github.ref) + build_kubectl-plugin: + env: + working-directory: ./kubectl-plugin + name: Build Ray Kubectl plugin + runs-on: ubuntu-22.04 + steps: + - name: Set up Go + uses: actions/setup-go@v3 + with: + go-version: v1.22 + + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + with: + # When checking out the repository that + # triggered a workflow, this defaults to the reference or SHA for that event. + # Default value should work for both pull_request and merge(push) event. + ref: ${{github.event.pull_request.head.sha}} + + - name: Get dependencies + run: go mod download + working-directory: ${{env.working-directory}} + + - name: Build CLI + run: go build -o kubectl-ray -a ./cmd/kubectl-ray.go + working-directory: ${{env.working-directory}} + + - name: Test + run: go test ./pkg/... -race -parallel 4 + working-directory: ${{env.working-directory}} + test-compatibility-2_7_0: needs: - build_operator - build_apiserver - lint - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 name: Compatibility Test - 2.7.0 steps: - name: Check out code into the Go module directory @@ -399,7 +344,7 @@ jobs: - build_operator - build_apiserver - lint - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 name: Compatibility Test - 2.8.0 steps: - name: Check out code into the Go module directory @@ -419,7 +364,7 @@ jobs: - build_operator - build_apiserver - lint - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 name: Compatibility Test - 2.9.0 steps: - name: Check out code into the Go module directory @@ -439,7 +384,7 @@ jobs: - build_operator - build_apiserver - lint - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 name: Compatibility Test - Nightly steps: - name: Check out code into the Go module directory @@ -455,7 +400,7 @@ jobs: ray_version: nightly python-client-test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 name: Python Client Test steps: - name: Set up Docker diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 00000000000..6e2ebda1bcf --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,91 @@ +linters-settings: + gofmt: + simplify: true + gosec: + excludes: + - G601 + ginkgolinter: + forbid-focus-container: true + goimports: + local-prefixes: github.com/ray-project/kuberay/ray-operator + misspell: + locale: US + nolintlint: + require-explanation: true + require-specific: true + revive: + ignore-generated-header: true + rules: + - name: blank-imports + - name: context-as-argument + - name: context-keys-type + - name: empty-block + - name: error-naming + - name: error-return + - name: error-strings + - name: errorf + - name: exported + disabled: true + - name: if-return + - name: increment-decrement + - name: indent-error-flow + - name: package-comments + - name: range + - name: receiver-naming + - name: redefines-builtin-id + - name: superfluous-else + - name: time-naming + - name: unexported-return + - name: unreachable-code + - name: unused-parameter + - name: var-declaration + - name: var-naming + exclude: + - "**/ray-operator/apis/config/v1alpha1/*.go" + - "**/ray-operator/apis/ray/v1alpha1/*.go" + - "**/ray-operator/apis/ray/v1/*.go" + arguments: + - ["ID", "JSON", "HTTP", "IP"] # AllowList + - [] # DenyList + - - upperCaseConst: true + gocyclo: + min-complexity: 15 + govet: + enable: + - fieldalignment + lll: + line-length: 120 +linters: + enable: + - asciicheck + - errcheck + - errorlint + - ginkgolinter +# - gocyclo + - gofmt + - gofumpt + - goimports + - gosec + - gosimple + - govet + - ineffassign +# - lll + - makezero + - misspell + - nilerr + - noctx + - nolintlint + - predeclared + - revive + - staticcheck + - typecheck + - unconvert + - unparam + - unused + - wastedassign + disable-all: true +issues: + max-issues-per-linter: 0 + max-same-issues: 0 +run: + timeout: 3m diff --git a/.krew.yaml b/.krew.yaml new file mode 100644 index 00000000000..0d2e12f1243 --- /dev/null +++ b/.krew.yaml @@ -0,0 +1,36 @@ +apiVersion: krew.googlecontainertools.github.com/v1alpha2 +kind: Plugin +metadata: + name: ray +spec: + version: {{ .TagName }} + homepage: https://github.com/ray-project/kuberay/tree/master/kubectl-plugin + platforms: + - selector: + matchLabels: + os: darwin + arch: amd64 + {{addURIAndSha "https://github.com/ray-project/kuberay/releases/download/{{ .TagName }}/kubectl-ray_{{ .TagName }}_darwin_amd64.tar.gz" .TagName }} + bin: kubectl-ray + - selector: + matchLabels: + os: darwin + arch: arm64 + {{addURIAndSha "https://github.com/ray-project/kuberay/releases/download/{{ .TagName }}/kubectl-ray_{{ .TagName }}_darwin_arm64.tar.gz" .TagName }} + bin: kubectl-ray + - selector: + matchLabels: + os: linux + arch: amd64 + {{addURIAndSha "https://github.com/ray-project/kuberay/releases/download/{{ .TagName }}/kubectl-ray_{{ .TagName }}_linux_amd64.tar.gz" .TagName }} + bin: kubectl-ray + - selector: + matchLabels: + os: linux + arch: arm64 + {{addURIAndSha "https://github.com/ray-project/kuberay/releases/download/{{ .TagName }}/kubectl-ray_{{ .TagName }}_linux_arm64.tar.gz" .TagName }} + bin: kubectl-ray + shortDescription: Ray kubectl plugin + description: | + Kubectl plugin/extension for Kuberay CLI that provides the ability to manage ray resources. + Read more documentation at: https://github.com/ray-project/kuberay/tree/master/kubectl-plugin diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000000..b76c8582f48 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,76 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +exclude: _generated.go$|\.svg$|^third_party/|^proto/swagger/|^apiserver/pkg/swagger/datafile.go$|^docs/reference/api.md$|^config/grafana/ + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + args: [--allow-multiple-documents] + exclude: ^helm-chart/|^mkdocs\.yml$|^benchmark/perf-tests/|^\.krew\.yaml$ + - id: check-added-large-files + - id: check-merge-conflict + - id: check-case-conflict + - id: check-vcs-permalinks + - id: check-json + - id: pretty-format-json + args: [--autofix, --no-sort-keys, --no-ensure-ascii] + - id: mixed-line-ending + args: [--fix=lf] + - id: detect-private-key + + - repo: https://github.com/gitleaks/gitleaks + rev: v8.18.2 + hooks: + - id: gitleaks + + - repo: local + hooks: + - id: check-golangci-lint-version + name: golangci-lint version check + entry: bash -c 'version="1.60.3"; [ "$(golangci-lint --version | awk "/version/ {print \$4}")" = "$version" ] || { echo "golangci-lint version is not $version"; exit 1; }' + language: system + always_run: true + fail_fast: true + pass_filenames: false + + - repo: local + hooks: + - id: golangci-lint-ray-operator + name: golangci-lint (ray-operator) + entry: bash -c 'cd ray-operator && golangci-lint run --fix --exclude-files _generated.go --timeout 10m0s; status=$?; cd ..; exit $status' + types: [ go ] + language: golang + require_serial: true + files: ^ray-operator/ + + - repo: local + hooks: + - id: golangci-lint-kubectl-plugin + name: golangci-lint (kubectl-plugin) + entry: bash -c 'cd kubectl-plugin && golangci-lint run --fix --timeout 10m0s; status=$?; cd ..; exit $status' + types: [ go ] + language: golang + require_serial: true + files: ^kubectl-plugin/ + + - repo: local + hooks: + - id: check-kubeconform-version + name: kubeconform version check + entry: bash -c 'version="0.6.7"; [ "$(kubeconform -v | awk -F"v" "{print \$2}")" = "$version" ] || { echo "kubeconform version is not $version"; exit 1; }' + language: system + always_run: true + fail_fast: true + pass_filenames: false + + - repo: local + hooks: + - id: validate-helm-charts + name: validate helm charts with kubeconform + entry: bash scripts/validate-helm.sh + language: system + pass_filenames: false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f4e425623b0..074997555ec 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -Thank you for investing your time in contributing to KubeRay project! +Thank you for investing your time in contributing to KubeRay project! Read our [Code of Coduct](./CODE_OF_CONDUCT.md) to keep our community approachable and respectable. @@ -27,7 +27,7 @@ Refer to the template for more information on what goes into a PR description. ### Design Docs -A contributor proposes a design with a PR on the repository to allow for revisions and discussions. If a design needs to be discussed before formulating a document for it, make use of Google doc and GitHub issue to involve the community on the discussion. +A contributor proposes a design with a PR on the repository to allow for revisions and discussions. If a design needs to be discussed before formulating a document for it, make use of Google doc and GitHub issue to involve the community on the discussion. ### GitHub Issues @@ -48,7 +48,7 @@ When filing an issue, please check existing open, or recently closed, issues to ### Find interesting issue -If you spot a problem with the problem, [search if an issue already exists](https://github.com/ray-project/kuberay/issues). If a related issue doesn't exist, you can open a new issue using [issue template](https://github.com/ray-project/kuberay/issues/new/choose). +If you spot a problem with the problem, [search if an issue already exists](https://github.com/ray-project/kuberay/issues). If a related issue doesn't exist, you can open a new issue using [issue template](https://github.com/ray-project/kuberay/issues/new/choose). ### Solve an issue @@ -56,7 +56,7 @@ KubeRay has subproject and each of them may have different development and testi ### Open a Pull request. -When you're done making the changes, open a pull request and fill PR template so we can better review your PR. The template helps reviewers understand your changes and the purpose of your pull request. +When you're done making the changes, open a pull request and fill PR template so we can better review your PR. The template helps reviewers understand your changes and the purpose of your pull request. Don't forget to link PR to issue if you are solving one. diff --git a/README.md b/README.md index 8dd91cf812f..485622cd935 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,6 @@ by some organizations to back user interfaces for KubeRay resource management. * **KubeRay Python client**: This Python client library provides APIs to handle RayCluster from your Python application. -* **KubeRay CLI**: KubeRay CLI provides the ability to manage KubeRay resources through command-line interface. - ## Documentation From September 2023, all user-facing KubeRay documentation will be hosted on the [Ray documentation](https://docs.ray.io/en/latest/cluster/kubernetes/index.html). diff --git a/apiserver/CreatingServe.md b/apiserver/CreatingServe.md index 02f9d435fa4..c3888b59e91 100644 --- a/apiserver/CreatingServe.md +++ b/apiserver/CreatingServe.md @@ -88,7 +88,7 @@ kubectl get service that should return the following: ```shell -test-cluster-head-svc ClusterIP 10.96.19.185 8265/TCP,52365/TCP,10001/TCP,8080/TCP,6379/TCP,8000/TCP +test-cluster-head-svc ClusterIP 10.96.19.185 8265/TCP,52365/TCP,10001/TCP,8080/TCP,6379/TCP,8000/TCP test-cluster-serve-svc ClusterIP 10.96.144.162 8000/TCP ``` diff --git a/apiserver/DEVELOPMENT.md b/apiserver/DEVELOPMENT.md index 4e8d3cc63ca..b6f0bee9c20 100644 --- a/apiserver/DEVELOPMENT.md +++ b/apiserver/DEVELOPMENT.md @@ -83,8 +83,8 @@ Examples: # To run end to end test using default cluster make e2e-test -# To run end to end test in fresh cluster. -# Please note that: +# To run end to end test in fresh cluster. +# Please note that: # * the cluster created for this test is the same as the cluster created by make cluster. # * if the end to end tests fail the cluster will still be up and will have to be explicitly shutdown by executing make clean-cluster make local-e2e-test @@ -97,36 +97,7 @@ To update the swagger ui files deployed with the Kuberay API server, you'll need * Manually run the [hack/update-swagger-ui.bash](hack/update-swagger-ui.bash) script. The script downloads the swagger ui release and copies the downloaded files to the [../third_party/swagger-ui](../third_party/swagger-ui/) directory. It copies the [swagger-initializer.js](../third_party/swagger-ui/swagger-initializer.js) to [swagger-initializer.js.backup](../third_party/swagger-ui/swagger-initializer.js.backup). -* Update the contents of the [swagger-initializer.js](../third_party/swagger-ui/swagger-initializer.js) to set the URLs for for the individual swagger docs. The content of the file is show below: - - ```javascript - window.onload = function() { - // - - // the following lines will be replaced by docker/configurator, when it runs in a docker-container - window.ui = SwaggerUIBundle({ - spec: location.host, - urls: [{"url":"http://"+location.host+"/swagger/serve.swagger.json","name":"RayServe Service"}, - {"url":"http://"+location.host+"/swagger/error.swagger.json","name":"Errors API"}, - {"url":"http://"+location.host+"/swagger/job.swagger.json","name":"RayJob Service"}, - {"url":"http://"+location.host+"/swagger/config.swagger.json","name":"ComputeTemplate Service"}, - {"url":"http://"+location.host+"/swagger/cluster.swagger.json","name":"Cluster Service"}], - dom_id: '#swagger-ui', - deepLinking: true, - presets: [ - SwaggerUIBundle.presets.apis, - SwaggerUIStandalonePreset - ], - plugins: [ - SwaggerUIBundle.plugins.DownloadUrl - ], - layout: "StandaloneLayout" - }); - - // - }; - ``` - +* Update the contents of the [swagger-initializer.js](../third_party/swagger-ui/swagger-initializer.js) to set the URLs for for the individual swagger docs. * Execute `make build-swagger` target to update the contents of the [datafile.go](pkg/swagger/datafile.go) file. This will package the content of the [swagger-ui](../third_party/swagger-ui/) directory for serving by the api server (see [func serveSwaggerUI(mux *http.ServeMux)](https://github.com/ray-project/kuberay/blob/f1067378bc99987f3eba1e5b12b4cc797465336d/apiserver/cmd/main.go#L149) in [main.go](cmd/main.go)) The swagger ui is available at the following URLs: diff --git a/apiserver/Dockerfile b/apiserver/Dockerfile index 61402443739..ea77043619f 100644 --- a/apiserver/Dockerfile +++ b/apiserver/Dockerfile @@ -1,5 +1,5 @@ # Build the backend service -FROM registry.access.redhat.com/ubi9/go-toolset:1.20.10 as builder +FROM golang:1.22.4-bullseye as builder WORKDIR /workspace # Copy the Go Modules manifests diff --git a/apiserver/HACluster.md b/apiserver/HACluster.md index 4879c8d7622..b07238725cd 100644 --- a/apiserver/HACluster.md +++ b/apiserver/HACluster.md @@ -114,7 +114,7 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ "environment": { "values": { "RAY_gcs_rpc_server_reconnect_timeout_s": "300" - } + } }, "volumes": [ { @@ -131,7 +131,7 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ } ] } -}' +}' ``` Note that computeTemplate here has to be created using this [command](test/cluster//template/simple) @@ -140,7 +140,7 @@ Lets discuss the important pieces here: You need to specify annotation, that tells Ray that this is cluster with GCS fault tolerance ```sh -ray.io/ft-enabled: "true" +ray.io/ft-enabled: "true" ``` For the `headGroupSpec` you need the following. In the `rayStartParams` you need to add information about Redis @@ -177,7 +177,7 @@ environment variable: "environment": { "values": { "RAY_gcs_rpc_server_reconnect_timeout_s": "300" - } + } }, ``` diff --git a/apiserver/JobSubmission.md b/apiserver/JobSubmission.md index def5bc00041..4fc1351f71a 100644 --- a/apiserver/JobSubmission.md +++ b/apiserver/JobSubmission.md @@ -114,7 +114,7 @@ Once the job is submitted, the following command can be used to get job's detail ```shell curl -X GET 'localhost:31888/apis/v1/namespaces/default/jobsubmissions/test-cluster/raysubmit_KWZLwme56esG3Wcr' \ ---header 'Content-Type: application/json' +--header 'Content-Type: application/json' ``` This should return JSON similar to the one below @@ -141,7 +141,7 @@ You can also get job execution log using the following command (Note that submis ```shell curl -X GET 'localhost:31888/apis/v1/namespaces/default/jobsubmissions/test-cluster/log/raysubmit_KWZLwme56esG3Wcr' \ ---header 'Content-Type: application/json' +--header 'Content-Type: application/json' ``` This will return execution log, that will look something like the following @@ -165,7 +165,7 @@ You can also list all the jobs (in any state) in the Ray cluster using the follo ```shell curl -X GET 'localhost:31888/apis/v1/namespaces/default/jobsubmissions/test-cluster' \ ---header 'Content-Type: application/json' +--header 'Content-Type: application/json' ``` This should return the list of the submissions, that looks as follows: @@ -196,7 +196,7 @@ Execution of the job can be stoped using the following command (Note that submis ```shell curl -X POST 'localhost:31888/apis/v1/namespaces/default/jobsubmissions/test-cluster/raysubmit_KWZLwme56esG3Wcr' \ ---header 'Content-Type: application/json' +--header 'Content-Type: application/json' ``` ### Delete Job @@ -205,7 +205,7 @@ Finally, you can delete job using the following command (Note that submission id ```shell curl -X DELETE 'localhost:31888/apis/v1/namespaces/default/jobsubmissions/test-cluster/raysubmit_KWZLwme56esG3Wcr' \ ---header 'Content-Type: application/json' +--header 'Content-Type: application/json' ``` You can validate job deletion by looking at the Ray dashboard (jobs pane) and ensuring that it was removed diff --git a/apiserver/Makefile b/apiserver/Makefile index d9c8e451b12..79a52ec6f1f 100644 --- a/apiserver/Makefile +++ b/apiserver/Makefile @@ -4,19 +4,19 @@ REPO_ROOT := $(shell dirname ${PWD}) REPO_ROOT_BIN := $(REPO_ROOT)/bin # Image URL to use all building/pushing image targets -IMG_REPO ?= kuberay/apiserver +IMG_REPO ?= quay.io/kuberay/apiserver IMG_TAG ?=latest IMG ?= $(IMG_REPO):$(IMG_TAG) # Allow for additional test flags (-v, etc) -GO_TEST_FLAGS ?= +GO_TEST_FLAGS ?= # Ray docker images to use for end to end tests based upon the architecture # for arm64 environments (Apple silicon included) pull the architecture specific image ifeq (arm64,$(shell go env GOARCH)) E2E_API_SERVER_RAY_IMAGE ?=rayproject/ray:2.9.0-py310-aarch64 -else +else E2E_API_SERVER_RAY_IMAGE ?=rayproject/ray:2.9.0-py310 -endif +endif # Kuberay API Server base URL to use in end to end tests E2E_API_SERVER_URL ?=http://localhost:31888 @@ -70,12 +70,12 @@ fumpt: gofumpt ## Run gofmtumpt against code. .PHONY: imports imports: goimports ## Run goimports against code. - $(GOIMPORTS) -l -w . + $(GOIMPORTS) -l -w . .PHONY: lint lint: golangci-lint fmt vet fumpt imports ## Run the linter. # exclude the SA1019 check which checks the usage of deprecated fields. - $(GOLANGCI_LINT) run --timeout=3m --exclude='SA1019' + $(GOLANGCI_LINT) run --timeout=3m --exclude='SA1019' --no-config build: fmt vet fumpt imports lint ## Build api server binary. go build -o ${REPO_ROOT_BIN}/kuberay-apiserver cmd/main.go @@ -96,7 +96,7 @@ test: fmt vet fumpt imports lint ## Run all unit tests. .PHONY: e2e-test e2e-test: ## Run end to end tests using a pre-exiting cluster. go test ./test/e2e/... $(GO_TEST_FLAGS) -timeout 60m -race -count=1 -parallel 4 - + .PHONY: local-e2e-test ## Run end to end tests on newly created cluster. local-e2e-test: operator-image cluster load-operator-image deploy-operator install load-ray-test-image e2e-test clean-cluster ## Run end to end tests, create a fresh kind cluster will all components deployed. @@ -119,15 +119,15 @@ load-image: ## Load the api server image to the kind cluster created with create .PHONY: operator-image operator-image: ## Build the operator image to be loaded in your kind cluster. - cd ../ray-operator && $(MAKE) docker-image -e IMG=kuberay/operator:$(OPERATOR_IMAGE_TAG) + cd ../ray-operator && $(MAKE) docker-image -e IMG=quay.io/kuberay/operator:$(OPERATOR_IMAGE_TAG) .PHONY: security-proxy-image security-proxy-image: ## Build the security proxy image to be loaded in your kind cluster. - cd ../experimental && $(MAKE) docker-image -e IMG=kuberay/security-proxy:$(SECURITY_IMAGE_TAG) + cd ../experimental && $(MAKE) docker-image -e IMG=quay.io/kuberay/security-proxy:$(SECURITY_IMAGE_TAG) .PHONY: deploy-operator -deploy-operator: ## Deploy operator via helm into the K8s cluster specified in ~/.kube/config. -# Note that you should make your operatorimage available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. +deploy-operator: ## Deploy operator via helm into the K8s cluster specified in ~/.kube/config. +# Note that you should make your operatorimage available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. # If you are using a Kind cluster for development, you can run `make load-image` to load the newly built image into the Kind cluster. helm upgrade --install raycluster ../helm-chart/kuberay-operator --wait \ --set image.tag=${OPERATOR_IMAGE_TAG} --set image.pullPolicy=IfNotPresent @@ -139,17 +139,17 @@ undeploy-operator: ## Undeploy operator via helm from the K8s cluster specified .PHONY: load-operator-image load-operator-image: ## Load the operator image to the kind cluster created with make cluster. ifneq (${OPERATOR_IMAGE_TAG}, latest) - $(ENGINE) pull kuberay/operator:$(OPERATOR_IMAGE_TAG) -endif - $(KIND) load docker-image kuberay/operator:$(OPERATOR_IMAGE_TAG) -n $(KIND_CLUSTER_NAME) + $(ENGINE) pull quay.io/kuberay/operator:$(OPERATOR_IMAGE_TAG) +endif + $(KIND) load docker-image quay.io/kuberay/operator:$(OPERATOR_IMAGE_TAG) -n $(KIND_CLUSTER_NAME) .PHONY: load-security-proxy-image load-security-proxy-image: ## Load the security proxy image to the kind cluster created with make cluster. ifneq (${SECURITY_IMAGE_TAG}, latest) $(ENGINE) pull kuberay/security-proxy:$(SECURITY_IMAGE_TAG) -endif +endif $(KIND) load docker-image kuberay/security-proxy:$(SECURITY_IMAGE_TAG) -n $(KIND_CLUSTER_NAME) - + .PHONY: load-ray-test-image load-ray-test-image: ## Load the ray test images $(ENGINE) pull $(E2E_API_SERVER_RAY_IMAGE) @@ -184,8 +184,8 @@ uninstall-secure: ## Remove the kuberay api server with security server from the $(KUSTOMIZE) build deploy/local/secure | kubectl delete -f - .PHONY: deploy -deploy: ## Deploy via helm the kuberay api server to the K8s cluster specified in ~/.kube/config. -# Note that you should make your KubeRay APIServer image available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. +deploy: ## Deploy via helm the kuberay api server to the K8s cluster specified in ~/.kube/config. +# Note that you should make your KubeRay APIServer image available by either pushing it to an image registry, such as DockerHub or Quay, or by loading the image into the Kubernetes cluster. # If you are using a Kind cluster for development, you can run `make load-image` to load the newly built image into the Kind cluster. helm upgrade --install kuberay-apiserver ../helm-chart/kuberay-apiserver --wait \ --set image.repository=${IMG_REPO},image.tag=${IMG_TAG} --set image.pullPolicy=IfNotPresent @@ -210,7 +210,7 @@ GOBINDATA ?= $(REPO_ROOT_BIN)/go-bindata ## Tool Versions -KUSTOMIZE_VERSION ?= v3.8.7 +KUSTOMIZE_VERSION ?= v5.4.3 GOFUMPT_VERSION ?= v0.3.1 GOIMPORTS_VERSION ?= v0.14.0 GOLANGCI_LINT_VERSION ?= v1.54.1 @@ -227,10 +227,10 @@ $(KUSTOMIZE): $(REPO_ROOT_BIN) goimports: $(GOIMPORTS) ## Download goimports locally if necessary $(GOIMPORTS): $(REPO_ROOT_BIN) test -s $(GOIMPORTS) || GOBIN=$(REPO_ROOT_BIN) go install golang.org/x/tools/cmd/goimports@$(GOIMPORTS_VERSION) - + .PHONY: gofumpt gofumpt: $(GOFUMPT) ## Download gofumpt locally if necessary. -$(GOFUMPT): $(REPO_ROOT_BIN) +$(GOFUMPT): $(REPO_ROOT_BIN) test -s $(GOFUMPT) || GOBIN=$(REPO_ROOT_BIN) go install mvdan.cc/gofumpt@$(GOFUMPT_VERSION) .PHONY: golangci-lint @@ -240,7 +240,7 @@ $(GOLANGCI_LINT): $(REPO_ROOT_BIN) .PHONY: kind kind: $(KIND) ## Download kind locally if necessary. -$(KIND): $(REPO_ROOT_BIN) +$(KIND): $(REPO_ROOT_BIN) test -s $(KIND) || GOBIN=$(REPO_ROOT_BIN) go install sigs.k8s.io/kind@$(KIND_VERSION) .PHONY: go-bindata diff --git a/apiserver/README.md b/apiserver/README.md index 5484058c520..074401c82d0 100644 --- a/apiserver/README.md +++ b/apiserver/README.md @@ -52,13 +52,13 @@ to the api server that would allow Kuberay Serve endpoints to work properly ``` * Install a stable version via Helm repository (only supports KubeRay v0.4.0+) - + ```sh # Install the KubeRay helm repo helm repo add kuberay https://ray-project.github.io/kuberay-helm/ # Install KubeRay APIServer. - helm install kuberay-apiserver kuberay/kuberay-apiserver + helm install kuberay-apiserver kuberay/kuberay-apiserver # Check the KubeRay APIServer Pod in `default` namespace kubectl get pods @@ -68,7 +68,7 @@ to the api server that would allow Kuberay Serve endpoints to work properly ``` * Install the nightly version - + ```sh # Step1: Clone KubeRay repository @@ -80,7 +80,7 @@ to the api server that would allow Kuberay Serve endpoints to work properly ``` * Install the current (working branch) version - + ```sh # Step1: Clone KubeRay repository @@ -89,7 +89,7 @@ to the api server that would allow Kuberay Serve endpoints to work properly # Step3: Build docker image, create a local kind cluster and deploy api server (using helm) make docker-image cluster load-image deploy - ``` + ``` ### List the chart @@ -98,8 +98,8 @@ To list the deployments: ```sh helm ls # NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION -# kuberay-apiserver default 1 2023-09-25 10:42:34.267328 +0300 EEST deployed kuberay-apiserver-1.0.0 -# kuberay-operator default 1 2023-09-25 10:41:48.355831 +0300 EEST deployed kuberay-operator-1.0.0 +# kuberay-apiserver default 1 2023-09-25 10:42:34.267328 +0300 EEST deployed kuberay-apiserver-1.0.0 +# kuberay-operator default 1 2023-09-25 10:41:48.355831 +0300 EEST deployed kuberay-operator-1.0.0 ``` ### Uninstall the Chart @@ -157,8 +157,8 @@ The following steps allow you to validate that the KubeRay API Server components - containerPort: 31888 hostPort: 31888 listenAddress: "0.0.0.0" - - containerPort: 31887 - hostPort: 31887 + - containerPort: 31887 + hostPort: 31887 listenAddress: "0.0.0.0" - role: worker image: kindest/node:v1.23.17@sha256:59c989ff8a517a93127d4a536e7014d28e235fb3529d9fba91b3951d461edfdb @@ -738,7 +738,7 @@ Examples: } } ] - } + } ``` #### Get cluster by its name and namespace @@ -908,7 +908,7 @@ POST {{baseUrl}}/apis/v1/namespaces//jobs Examples: * Request - + ```sh curl --silent -X 'POST' \ 'http://localhost:31888/apis/v1/namespaces/ray-system/jobs' \ @@ -988,7 +988,7 @@ Examples: Start from creating Ray cluster (We assume here that the [template](test/cluster/template/simple) and [configmap](test/job/code.yaml) are already created). * Request - + ```sh curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ --header 'Content-Type: application/json' \ @@ -1068,11 +1068,11 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ "source":"ray-job-code-sample", "items":{ "sample_code.py":"sample_code.py" - } + } } ], "environment":{ - + } }, "workerGroupSpec":[ @@ -1098,7 +1098,7 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ } ], "environment":{ - + } } ] @@ -1113,7 +1113,7 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/clusters' \ Once the cluster is created, we can create a job to run on it. * Request - + ```sh curl -X POST 'localhost:31888/apis/v1/namespaces/default/jobs' \ --header 'Content-Type: application/json' \ @@ -1121,13 +1121,13 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/jobs' \ "name": "job-test", "namespace": "default", "user": "boris", - "version": "2.9.0", + "version": "2.9.0", "entrypoint": "python /home/ray/samples/sample_code.py", "runtimeEnv": "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", "jobSubmitter": { "image": "rayproject/ray:2.9.0-py310", "cpu": "400m", - "memory": "150Mi" + "memory": "150Mi" }, "clusterSelector": { "ray.io/cluster": "job-test" @@ -1160,7 +1160,7 @@ job-test-2hhmf 0/1 Completed 0 15s To see job execution results run: ```sh -kubectl logs job-test-2hhmf +kubectl logs job-test-2hhmf ``` And you should get something similar to: @@ -1180,7 +1180,7 @@ And you should get something similar to: 2023-10-18 03:19:52,203 INFO cli.py:292 -- Tailing logs until the job exits (disable with --no-wait): 2023-10-18 03:20:00,014 INFO worker.py:1329 -- Using address 10.244.0.10:6379 set in the environment variable RAY_ADDRESS 2023-10-18 03:20:00,014 INFO worker.py:1458 -- Connecting to existing Ray cluster at address: 10.244.0.10:6379... -2023-10-18 03:20:00,032 INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at 10.244.0.10:8265 +2023-10-18 03:20:00,032 INFO worker.py:1633 -- Connected to Ray cluster. View the dashboard at 10.244.0.10:8265 test_counter got 1 test_counter got 2 test_counter got 3 @@ -1200,7 +1200,7 @@ kubectl delete rayjob job-test ``` * Request - + ```sh curl -X POST 'localhost:31888/apis/v1/namespaces/default/jobs' \ --header 'Content-Type: application/json' \ @@ -1217,7 +1217,7 @@ curl -X POST 'localhost:31888/apis/v1/namespaces/default/jobs' \ "jobSubmitter": { "image": "rayproject/ray:2.9.0-py310" } -}' +}' ``` * Response @@ -1309,11 +1309,11 @@ GET {{baseUrl}}/apis/v1/jobs Examples: * Request: - + ```sh curl --silent -X 'GET' \ 'http://localhost:31888/apis/v1/jobs' \ - -H 'accept: application/json' + -H 'accept: application/json' ``` * Response @@ -1431,7 +1431,7 @@ Examples: ``` * Response - + ```json {} ``` @@ -1505,7 +1505,7 @@ Examples: "metrics-export-port":"8080" }, "environment":{ - + } }, "workerGroupSpec":[ @@ -1520,7 +1520,7 @@ Examples: "node-ip-address":"$MY_POD_IP" }, "environment":{ - + } } ] @@ -1630,7 +1630,7 @@ Examples: }, "createdAt":"2024-01-17T09:31:34Z", "deleteAt":"1969-12-31T23:59:59Z" -} +} ``` #### List all services in a given namespace @@ -1669,7 +1669,7 @@ Examples "metrics-export-port":"8080" }, "environment":{ - + } }, "workerGroupSpec":[ @@ -1684,7 +1684,7 @@ Examples "node-ip-address":"$MY_POD_IP" }, "environment":{ - + } } ] @@ -1944,7 +1944,7 @@ Examples: "metrics-export-port":"8080" }, "environment":{ - + } }, "workerGroupSpec":[ @@ -1959,7 +1959,7 @@ Examples: "node-ip-address":"$MY_POD_IP" }, "environment":{ - + } } ] @@ -2196,7 +2196,7 @@ Examples: ```sh curl --silent -X 'GET' \ 'http://localhost:31888/apis/v1/namespaces/default/services/test-v2' \ - -H 'accept: application/json' + -H 'accept: application/json' ``` * Response: @@ -2217,7 +2217,7 @@ Examples: "metrics-export-port":"8080" }, "environment":{ - + } }, "workerGroupSpec":[ @@ -2232,7 +2232,7 @@ Examples: "node-ip-address":"$MY_POD_IP" }, "environment":{ - + } } ] @@ -2467,7 +2467,7 @@ Examples: ```sh curl --silent -X 'DELETE' \ 'http://localhost:31888/apis/v1/namespaces/default/services/test-v2' \ - -H 'accept: application/json' + -H 'accept: application/json' ``` * Response diff --git a/apiserver/deploy/base/insecure/kustomization.yaml b/apiserver/deploy/base/insecure/kustomization.yaml index e9dff844f71..1233af4d197 100644 --- a/apiserver/deploy/base/insecure/kustomization.yaml +++ b/apiserver/deploy/base/insecure/kustomization.yaml @@ -6,10 +6,12 @@ namespace: ray-system resources: - apiserver.yaml -commonLabels: - app.kubernetes.io/name: kuberay - app.kubernetes.io/component: kuberay-apiserver images: - name: kuberay/apiserver newName: kuberay/apiserver newTag: nightly +labels: +- includeSelectors: true + pairs: + app.kubernetes.io/component: kuberay-apiserver + app.kubernetes.io/name: kuberay diff --git a/apiserver/deploy/base/secure/kustomization.yaml b/apiserver/deploy/base/secure/kustomization.yaml index 416034988f4..f0f9f15b5a6 100644 --- a/apiserver/deploy/base/secure/kustomization.yaml +++ b/apiserver/deploy/base/secure/kustomization.yaml @@ -6,9 +6,6 @@ namespace: ray-system resources: - apiserver.yaml -commonLabels: - app.kubernetes.io/name: kuberay - app.kubernetes.io/component: kuberay-apiserver images: - name: kuberay/apiserver newName: kuberay/apiserver @@ -16,3 +13,8 @@ images: - name: kuberay/security-proxy newName: kuberay/security-proxy newTag: nightly +labels: +- includeSelectors: true + pairs: + app.kubernetes.io/component: kuberay-apiserver + app.kubernetes.io/name: kuberay diff --git a/apiserver/deploy/local/insecure/kustomization.yaml b/apiserver/deploy/local/insecure/kustomization.yaml index 6bab77bc406..3771703c145 100644 --- a/apiserver/deploy/local/insecure/kustomization.yaml +++ b/apiserver/deploy/local/insecure/kustomization.yaml @@ -3,7 +3,11 @@ kind: Kustomization resources: - ../../base/insecure namespace: ray-system -patchesJson6902: +images: +- name: kuberay/apiserver + newName: quay.io/kuberay/apiserver + newTag: latest +patches: - patch: |- - op: replace path: /spec/template/spec/containers/0/imagePullPolicy @@ -12,7 +16,3 @@ patchesJson6902: kind: Deployment name: kuberay-apiserver version: v1 -images: -- name: kuberay/apiserver - newName: kuberay/apiserver - newTag: latest diff --git a/apiserver/deploy/local/secure/kustomization.yaml b/apiserver/deploy/local/secure/kustomization.yaml index 6c8b85d5443..6e597082599 100644 --- a/apiserver/deploy/local/secure/kustomization.yaml +++ b/apiserver/deploy/local/secure/kustomization.yaml @@ -3,7 +3,14 @@ kind: Kustomization resources: - ../../base/secure namespace: ray-system -patchesJson6902: +images: +- name: kuberay/apiserver + newName: kuberay/apiserver + newTag: latest +- name: kuberay/security-proxy + newName: kuberay/security-proxy + newTag: latest +patches: - patch: |- - op: replace path: /spec/template/spec/containers/0/imagePullPolicy @@ -15,10 +22,3 @@ patchesJson6902: kind: Deployment name: kuberay-apiserver version: v1 -images: -- name: kuberay/apiserver - newName: kuberay/apiserver - newTag: latest -- name: kuberay/security-proxy - newName: kuberay/security-proxy - newTag: latest diff --git a/apiserver/deploy/prometheus/api_server_service_monitor.yaml b/apiserver/deploy/prometheus/api_server_service_monitor.yaml index 490b3d10e06..e3af6da7c41 100644 --- a/apiserver/deploy/prometheus/api_server_service_monitor.yaml +++ b/apiserver/deploy/prometheus/api_server_service_monitor.yaml @@ -14,4 +14,4 @@ spec: - default # ns where API server is deployed selector: matchLabels: - app.kubernetes.io/component: kuberay-apiserver \ No newline at end of file + app.kubernetes.io/component: kuberay-apiserver diff --git a/apiserver/deploy/prometheus/ray_cluster_pod_monitor.yaml b/apiserver/deploy/prometheus/ray_cluster_pod_monitor.yaml index 850309260c2..9af59b141b7 100644 --- a/apiserver/deploy/prometheus/ray_cluster_pod_monitor.yaml +++ b/apiserver/deploy/prometheus/ray_cluster_pod_monitor.yaml @@ -13,4 +13,4 @@ spec: - default # ns where Ray cluster is deployed selector: matchLabels: - app.kubernetes.io/name: kuberay \ No newline at end of file + app.kubernetes.io/name: kuberay diff --git a/apiserver/go.mod b/apiserver/go.mod index dc0e8ecd61c..ad5f0adcb78 100644 --- a/apiserver/go.mod +++ b/apiserver/go.mod @@ -1,95 +1,94 @@ module github.com/ray-project/kuberay/apiserver -go 1.20 +go 1.22.0 + +toolchain go1.22.4 require ( - github.com/go-openapi/runtime v0.19.31 + github.com/go-openapi/runtime v0.28.0 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.16.0 + github.com/prometheus/client_golang v1.19.1 github.com/ray-project/kuberay/proto v0.0.0-20220703232803-3e7749d17400 github.com/ray-project/kuberay/ray-operator v0.0.0-20220703232803-3e7749d17400 - github.com/stretchr/testify v1.8.4 - google.golang.org/grpc v1.59.0 - google.golang.org/protobuf v1.32.0 - k8s.io/api v0.28.4 - k8s.io/apimachinery v0.28.4 - k8s.io/client-go v0.28.4 - k8s.io/klog/v2 v2.100.1 + github.com/stretchr/testify v1.9.0 + google.golang.org/grpc v1.64.0 + google.golang.org/protobuf v1.34.2 + k8s.io/api v0.30.2 + k8s.io/apimachinery v0.30.2 + k8s.io/client-go v0.30.2 + k8s.io/klog/v2 v2.130.1 ) require ( - github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49 + github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 github.com/elazarl/go-bindata-assetfs v1.0.1 - github.com/go-logr/logr v1.2.4 + github.com/go-logr/logr v1.4.2 github.com/go-logr/zerologr v1.2.3 - github.com/golang/protobuf v1.5.3 - github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 + github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 - github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 - github.com/rs/zerolog v1.31.0 - google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d - k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 - sigs.k8s.io/controller-runtime v0.16.3 - sigs.k8s.io/yaml v1.3.0 + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 + github.com/rs/zerolog v1.33.0 + google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d + k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 + sigs.k8s.io/controller-runtime v0.18.4 + sigs.k8s.io/yaml v1.4.0 ) require github.com/pmezard/go-difflib v1.0.0 // indirect require ( - github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 // indirect + github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch/v5 v5.6.0 // indirect - github.com/fsnotify/fsnotify v1.6.0 // indirect - github.com/go-openapi/errors v0.19.6 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/strfmt v0.19.5 // indirect - github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-stack/stack v1.8.0 // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-openapi/errors v0.22.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/strfmt v0.23.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.5.9 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/uuid v1.3.1 // indirect - github.com/imdario/mergo v0.3.12 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect - github.com/mitchellh/mapstructure v1.4.1 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/prometheus/client_model v0.4.0 // indirect - github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.10.1 // indirect + github.com/oklog/ulid v1.3.1 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.54.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect - go.mongodb.org/mongo-driver v1.5.1 // indirect - golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/oauth2 v0.11.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.3.0 // indirect + go.mongodb.org/mongo-driver v1.15.1 // indirect + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/term v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.28.4 // indirect - k8s.io/component-base v0.28.4 // indirect - k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect + k8s.io/apiextensions-apiserver v0.30.2 // indirect + k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) replace ( diff --git a/apiserver/go.sum b/apiserver/go.sum index bfe72faf0ac..7c331204692 100644 --- a/apiserver/go.sum +++ b/apiserver/go.sum @@ -1,783 +1,305 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= -cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= -cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= -cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= -cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= -cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= -cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= -cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= -github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 h1:4daAzAu0S6Vi7/lbWECcX0j45yZReDZ56BQsrVBOEEY= -github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= -github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49 h1:6SNWi8VxQeCSwmLuTbEvJd7xvPmdS//zvMBWweZLgck= -github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49/go.mod h1:V+Qd57rJe8gd4eiGzZyg4h54VLHmYVVw54iMnlAMrF8= +github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 h1:aYo8nnk3ojoQkP5iErif5Xxv0Mo0Ga/FR5+ffl/7+Nk= +github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0/go.mod h1:8AuBTZBRSFqEYBPYULd+NN474/zZBLP+6WeT5S9xlAc= github.com/elazarl/go-bindata-assetfs v1.0.1 h1:m0kkaHRKEu7tUIUFVwhGGGYClXvyl4RE03qmvRTNfbw= github.com/elazarl/go-bindata-assetfs v1.0.1/go.mod h1:v+YaWX3bdea5J/mo8dSETolEo7R71Vk1u8bnjau5yw4= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= -github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= -github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-logr/zerologr v1.2.3 h1:up5N9vcH9Xck3jJkXzgyOxozT14R47IyDODz8LM1KSs= github.com/go-logr/zerologr v1.2.3/go.mod h1:BxwGo7y5zgSHYR1BjbnHPyF/5ZjVKfKxAZANVu6E8Ho= -github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= -github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= -github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= -github.com/go-openapi/analysis v0.19.2/go.mod h1:3P1osvZa9jKjb8ed2TPng3f0i/UY9snX6gxi44djMjk= -github.com/go-openapi/analysis v0.19.4/go.mod h1:3P1osvZa9jKjb8ed2TPng3f0i/UY9snX6gxi44djMjk= -github.com/go-openapi/analysis v0.19.5/go.mod h1:hkEAkxagaIvIP7VTn8ygJNkd4kAYON2rCu0v0ObL0AU= -github.com/go-openapi/analysis v0.19.10/go.mod h1:qmhS3VNFxBlquFJ0RGoDtylO9y4pgTAUNE9AEEMdlJQ= -github.com/go-openapi/errors v0.17.0/go.mod h1:LcZQpmvG4wyF5j4IhA73wkLFQg+QJXOQHVjmcZxhka0= -github.com/go-openapi/errors v0.18.0/go.mod h1:LcZQpmvG4wyF5j4IhA73wkLFQg+QJXOQHVjmcZxhka0= -github.com/go-openapi/errors v0.19.2/go.mod h1:qX0BLWsyaKfvhluLejVpVNwNRdXZhEbTA4kxxpKBC94= -github.com/go-openapi/errors v0.19.3/go.mod h1:qX0BLWsyaKfvhluLejVpVNwNRdXZhEbTA4kxxpKBC94= -github.com/go-openapi/errors v0.19.6 h1:xZMThgv5SQ7SMbWtKFkCf9bBdvR2iEyw9k3zGZONuys= -github.com/go-openapi/errors v0.19.6/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= -github.com/go-openapi/jsonpointer v0.17.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= -github.com/go-openapi/jsonpointer v0.18.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= -github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= -github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonreference v0.17.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= -github.com/go-openapi/jsonreference v0.18.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= -github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= -github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/loads v0.17.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= -github.com/go-openapi/loads v0.18.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= -github.com/go-openapi/loads v0.19.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= -github.com/go-openapi/loads v0.19.2/go.mod h1:QAskZPMX5V0C2gvfkGZzJlINuP7Hx/4+ix5jWFxsNPs= -github.com/go-openapi/loads v0.19.3/go.mod h1:YVfqhUCdahYwR3f3iiwQLhicVRvLlU/WO5WPaZvcvSI= -github.com/go-openapi/loads v0.19.5/go.mod h1:dswLCAdonkRufe/gSUC3gN8nTSaB9uaS2es0x5/IbjY= -github.com/go-openapi/runtime v0.0.0-20180920151709-4f900dc2ade9/go.mod h1:6v9a6LTXWQCdL8k1AO3cvqx5OtZY/Y9wKTgaoP6YRfA= -github.com/go-openapi/runtime v0.19.0/go.mod h1:OwNfisksmmaZse4+gpV3Ne9AyMOlP1lt4sK4FXt0O64= -github.com/go-openapi/runtime v0.19.4/go.mod h1:X277bwSUBxVlCYR3r7xgZZGKVvBd/29gLDlFGtJ8NL4= -github.com/go-openapi/runtime v0.19.15/go.mod h1:dhGWCTKRXlAfGnQG0ONViOZpjfg0m2gUt9nTQPQZuoo= -github.com/go-openapi/runtime v0.19.31 h1:GX+MgBxN12s/tQiHNJpvHDIoZiEXAz6j6Rqg0oJcnpg= -github.com/go-openapi/runtime v0.19.31/go.mod h1:BvrQtn6iVb2QmiVXRsFAm6ZCAZBpbVKFfN6QWCp582M= -github.com/go-openapi/spec v0.17.0/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= -github.com/go-openapi/spec v0.18.0/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= -github.com/go-openapi/spec v0.19.2/go.mod h1:sCxk3jxKgioEJikev4fgkNmwS+3kuYdJtcsZsD5zxMY= -github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= -github.com/go-openapi/spec v0.19.6/go.mod h1:Hm2Jr4jv8G1ciIAo+frC/Ft+rR2kQDh8JHKHb3gWUSk= -github.com/go-openapi/spec v0.19.8/go.mod h1:Hm2Jr4jv8G1ciIAo+frC/Ft+rR2kQDh8JHKHb3gWUSk= -github.com/go-openapi/strfmt v0.17.0/go.mod h1:P82hnJI0CXkErkXi8IKjPbNBM6lV6+5pLP5l494TcyU= -github.com/go-openapi/strfmt v0.18.0/go.mod h1:P82hnJI0CXkErkXi8IKjPbNBM6lV6+5pLP5l494TcyU= -github.com/go-openapi/strfmt v0.19.0/go.mod h1:+uW+93UVvGGq2qGaZxdDeJqSAqBqBdl+ZPMF/cC8nDY= -github.com/go-openapi/strfmt v0.19.2/go.mod h1:0yX7dbo8mKIvc3XSKp7MNfxw4JytCfCD6+bY1AVL9LU= -github.com/go-openapi/strfmt v0.19.3/go.mod h1:0yX7dbo8mKIvc3XSKp7MNfxw4JytCfCD6+bY1AVL9LU= -github.com/go-openapi/strfmt v0.19.4/go.mod h1:eftuHTlB/dI8Uq8JJOyRlieZf+WkkxUuk0dgdHXr2Qk= -github.com/go-openapi/strfmt v0.19.5 h1:0utjKrw+BAh8s57XE9Xz8DUBsVvPmRUB6styvl9wWIM= -github.com/go-openapi/strfmt v0.19.5/go.mod h1:eftuHTlB/dI8Uq8JJOyRlieZf+WkkxUuk0dgdHXr2Qk= -github.com/go-openapi/swag v0.17.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= -github.com/go-openapi/swag v0.18.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= -github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.7/go.mod h1:ao+8BpOPyKdpQz3AOJfbeEVpLmWAvlT1IfTe5McPyhY= -github.com/go-openapi/swag v0.19.9/go.mod h1:ao+8BpOPyKdpQz3AOJfbeEVpLmWAvlT1IfTe5McPyhY= -github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4= -github.com/go-openapi/validate v0.19.2/go.mod h1:1tRCw7m3jtI8eNWEEliiAqUIcBztB2KDnRCRMUi7GTA= -github.com/go-openapi/validate v0.19.3/go.mod h1:90Vh6jjkTn+OT1Eefm0ZixWNFjhtOH7vS9k0lo6zwJo= -github.com/go-openapi/validate v0.19.10/go.mod h1:RKEZTUWDkxKQxN2jDT7ZnZi2bhZlbNMAuKvKB+IaGx8= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= +github.com/go-openapi/analysis v0.23.0 h1:aGday7OWupfMs+LbmLZG4k0MYXIANxcuBTYUC03zFCU= +github.com/go-openapi/analysis v0.23.0/go.mod h1:9mz9ZWaSlV8TvjQHLl2mUW2PbZtemkE8yA5v22ohupo= +github.com/go-openapi/errors v0.22.0 h1:c4xY/OLxUBSTiepAg3j/MHuAv5mJhnf53LLMWFB+u/w= +github.com/go-openapi/errors v0.22.0/go.mod h1:J3DmZScxCDufmIMsdOuDHxJbdOGC0xtUynjIx092vXE= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/loads v0.22.0 h1:ECPGd4jX1U6NApCGG1We+uEozOAvXvJSF4nnwHZ8Aco= +github.com/go-openapi/loads v0.22.0/go.mod h1:yLsaTCS92mnSAZX5WWoxszLj0u+Ojl+Zs5Stn1oF+rs= +github.com/go-openapi/runtime v0.28.0 h1:gpPPmWSNGo214l6n8hzdXYhPuJcGtziTOgUpvsFWGIQ= +github.com/go-openapi/runtime v0.28.0/go.mod h1:QN7OzcS+XuYmkQLw05akXk0jRH/eZ3kb18+1KwW9gyc= +github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY= +github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= +github.com/go-openapi/strfmt v0.23.0 h1:nlUS6BCqcnAk0pyhi9Y+kdDVZdZMHfEKQiS4HaMgO/c= +github.com/go-openapi/strfmt v0.23.0/go.mod h1:NrtIpfKtWIygRkKVsxh7XQMDQW5HKQl6S5ik2elW+K4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/validate v0.24.0 h1:LdfDKwNbpB6Vn40xhTdNZAnfLECL81w+VX3BumrGD58= +github.com/go-openapi/validate v0.24.0/go.mod h1:iyeX1sEufmv3nPbBdX3ieNviWnOZaJ1+zquzJEf2BAQ= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0= -github.com/gobuffalo/depgen v0.0.0-20190329151759-d478694a28d3/go.mod h1:3STtPUQYuzV0gBVOY3vy6CfMm/ljR4pABfrTeHNLHUY= -github.com/gobuffalo/depgen v0.1.0/go.mod h1:+ifsuy7fhi15RWncXQQKjWS9JPkdah5sZvtHc2RXGlg= -github.com/gobuffalo/envy v1.6.15/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI= -github.com/gobuffalo/envy v1.7.0/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI= -github.com/gobuffalo/flect v0.1.0/go.mod h1:d2ehjJqGOH/Kjqcoz+F7jHTBbmDb38yXA598Hb50EGs= -github.com/gobuffalo/flect v0.1.1/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI= -github.com/gobuffalo/flect v0.1.3/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI= -github.com/gobuffalo/genny v0.0.0-20190329151137-27723ad26ef9/go.mod h1:rWs4Z12d1Zbf19rlsn0nurr75KqhYp52EAGGxTbBhNk= -github.com/gobuffalo/genny v0.0.0-20190403191548-3ca520ef0d9e/go.mod h1:80lIj3kVJWwOrXWWMRzzdhW3DsrdjILVil/SFKBzF28= -github.com/gobuffalo/genny v0.1.0/go.mod h1:XidbUqzak3lHdS//TPu2OgiFB+51Ur5f7CSnXZ/JDvo= -github.com/gobuffalo/genny v0.1.1/go.mod h1:5TExbEyY48pfunL4QSXxlDOmdsD44RRq4mVZ0Ex28Xk= -github.com/gobuffalo/gitgen v0.0.0-20190315122116-cc086187d211/go.mod h1:vEHJk/E9DmhejeLeNt7UVvlSGv3ziL+djtTr3yyzcOw= -github.com/gobuffalo/gogen v0.0.0-20190315121717-8f38393713f5/go.mod h1:V9QVDIxsgKNZs6L2IYiGR8datgMhB577vzTDqypH360= -github.com/gobuffalo/gogen v0.1.0/go.mod h1:8NTelM5qd8RZ15VjQTFkAW6qOMx5wBbW4dSCS3BY8gg= -github.com/gobuffalo/gogen v0.1.1/go.mod h1:y8iBtmHmGc4qa3urIyo1shvOD8JftTtfcKi+71xfDNE= -github.com/gobuffalo/logger v0.0.0-20190315122211-86e12af44bc2/go.mod h1:QdxcLw541hSGtBnhUc4gaNIXRjiDppFGaDqzbrBd3v8= -github.com/gobuffalo/mapi v1.0.1/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc= -github.com/gobuffalo/mapi v1.0.2/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc= -github.com/gobuffalo/packd v0.0.0-20190315124812-a385830c7fc0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4= -github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4= -github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ= -github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0= -github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= -github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/jarcoal/httpmock v1.2.0 h1:gSvTxxFR/MEMfsGrvRbdfpRUMBStovlSRLw0Ep1bwwc= -github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= -github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/jarcoal/httpmock v1.2.0/go.mod h1:oCoTsnAz4+UoOUIf5lJOWV2QQIW5UoeUI6aM2YnWAZk= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4= -github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= -github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.1/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/markbates/oncer v0.0.0-20181203154359-bf2de49a0be2/go.mod h1:Ld9puTsIW75CHf65OeIOkyKbteujpZVXDpWK6YGZbxE= -github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= -github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.3.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= -github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= -github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= +github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= +github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= -github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= -github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= -github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= -github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= -github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.54.0 h1:ZlZy0BgJhTwVZUn7dLOkwCZHUkrAqd3WYtcFCWnM1D8= +github.com/prometheus/common v0.54.0/go.mod h1:/TQgMJP5CuVYveyT7n/0Ix8yLNNXy9yRSkhnLTHPDIQ= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= -github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= -github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= -github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= +github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= +github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= -github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= -github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= -github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs= -github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= -github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= -github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= -github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= -github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= -go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= -go.mongodb.org/mongo-driver v1.3.0/go.mod h1:MSWZXKOynuguX+JSvwP8i+58jYCXxbia8HS3gZBapIE= -go.mongodb.org/mongo-driver v1.3.4/go.mod h1:MSWZXKOynuguX+JSvwP8i+58jYCXxbia8HS3gZBapIE= -go.mongodb.org/mongo-driver v1.5.1 h1:9nOVLGDfOaZ9R0tBumx/BcuqkbFpyTCU2r/Po7A2azI= -go.mongodb.org/mongo-driver v1.5.1/go.mod h1:gRXCHX4Jo7J0IJ1oDQyUxF7jfy19UfxniMS4xxMmUqw= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.mongodb.org/mongo-driver v1.15.1 h1:l+RvoUOoMXFmADTLfYDm7On9dRm7p4T80/lEQM+r7HU= +go.mongodb.org/mongo-driver v1.15.1/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.25.0 h1:4Hvk6GtkucQ790dqmj7l1eEnRdKm3k3ZUrUMS2d5+5c= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190422162423-af44ce270edf/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190530122614-20be4c3c3ed5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190617133340-57b3e21c3d56/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= -golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190320064053-1272bf9dcd53/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= -golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190412183630-56d357773e84/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190321052220-f7bb7a8bee54/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190419153524-e8e3143a4f4a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190531175056-4c3a928424d2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190329151228-23e29df326fe/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190416151739-9c9e1878f421/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190420181800-aa740d480789/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190531172133-b3315ee88b7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190617190820-da514acc4774/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= -golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= -google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= -google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= -google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d h1:Aqf0fiIdUQEj0Gn9mKFFXoQfTTEaNopWpfVyYADxiSg= +google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d/go.mod h1:Od4k8V1LQSizPRUK4OzZ7TBE/20k+jPczUDAEyvn69Y= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d h1:k3zyW3BYYR30e8v3x0bTDdE9vpYFjZHK+HcyqkrppWk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.28.4 h1:8ZBrLjwosLl/NYgv1P7EQLqoO8MGQApnbgH8tu3BMzY= -k8s.io/api v0.28.4/go.mod h1:axWTGrY88s/5YE+JSt4uUi6NMM+gur1en2REMR7IRj0= -k8s.io/apiextensions-apiserver v0.28.4 h1:AZpKY/7wQ8n+ZYDtNHbAJBb+N4AXXJvyZx6ww6yAJvU= -k8s.io/apiextensions-apiserver v0.28.4/go.mod h1:pgQIZ1U8eJSMQcENew/0ShUTlePcSGFq6dxSxf2mwPM= -k8s.io/apimachinery v0.28.4 h1:zOSJe1mc+GxuMnFzD4Z/U1wst50X28ZNsn5bhgIIao8= -k8s.io/apimachinery v0.28.4/go.mod h1:wI37ncBvfAoswfq626yPTe6Bz1c22L7uaJ8dho83mgg= -k8s.io/client-go v0.28.4 h1:Np5ocjlZcTrkyRJ3+T3PkXDpe4UpatQxj85+xjaD2wY= -k8s.io/client-go v0.28.4/go.mod h1:0VDZFpgoZfelyP5Wqu0/r/TRYcLYuJ2U1KEeoaPa1N4= -k8s.io/component-base v0.28.4 h1:c/iQLWPdUgI90O+T9TeECg8o7N3YJTiuz2sKxILYcYo= -k8s.io/component-base v0.28.4/go.mod h1:m9hR0uvqXDybiGL2nf/3Lf0MerAfQXzkfWhUY58JUbU= -k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= -k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= -k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= -k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM= -k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 h1:qY1Ad8PODbnymg2pRbkyMT/ylpTrCM8P2RJ0yroCyIk= -k8s.io/utils v0.0.0-20230406110748-d93618cff8a2/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigwG62c4= -sigs.k8s.io/controller-runtime v0.16.3/go.mod h1:j7bialYoSn142nv9sCOJmQgDXQXxnroFU4VnX/brVJ0= +k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI= +k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI= +k8s.io/apiextensions-apiserver v0.30.2 h1:l7Eue2t6QiLHErfn2vwK4KgF4NeDgjQkCXtEbOocKIE= +k8s.io/apiextensions-apiserver v0.30.2/go.mod h1:lsJFLYyK40iguuinsb3nt+Sj6CmodSI4ACDLep1rgjw= +k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg= +k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50= +k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlLosk/JlfP3xQLNkQlHJeXw= +k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw= +sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE= -sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/apiserver/hack/kind-cluster-config.yaml b/apiserver/hack/kind-cluster-config.yaml index 53aef5cc7d8..1a6b4f1d0b1 100644 --- a/apiserver/hack/kind-cluster-config.yaml +++ b/apiserver/hack/kind-cluster-config.yaml @@ -13,8 +13,8 @@ nodes: - containerPort: 31888 hostPort: 31888 listenAddress: "0.0.0.0" - - containerPort: 31887 - hostPort: 31887 + - containerPort: 31887 + hostPort: 31887 listenAddress: "0.0.0.0" - role: worker image: kindest/node:v1.23.17@sha256:59c989ff8a517a93127d4a536e7014d28e235fb3529d9fba91b3951d461edfdb diff --git a/apiserver/hack/update-swagger-ui.bash b/apiserver/hack/update-swagger-ui.bash index d1bdc101332..9b603d26d06 100755 --- a/apiserver/hack/update-swagger-ui.bash +++ b/apiserver/hack/update-swagger-ui.bash @@ -12,9 +12,8 @@ fi echo "Downloading '${SWAGGER_UI_TAR_URL}' to update ${TARGET_DIR}" tmp="$(mktemp -d)" #pushd . -curl --output-dir ${tmp} --fail --silent --location --remote-header-name --remote-name ${SWAGGER_UI_TAR_URL} +curl --output-dir ${tmp} --fail --silent --location --remote-header-name --remote-name ${SWAGGER_UI_TAR_URL} tar -xzvf ${tmp}/swagger-ui-${SWAGGER_UI_VERSION}.tar.gz -C ${tmp} #popd cp -rv "$tmp/swagger-ui-${SWAGGER_UI_VERSION}/dist/"* "${TARGET_DIR}" rm -rf "$tmp" - diff --git a/apiserver/pkg/client/cluster.go b/apiserver/pkg/client/cluster.go index 123d89aec48..9f78d09d1e0 100644 --- a/apiserver/pkg/client/cluster.go +++ b/apiserver/pkg/client/cluster.go @@ -1,14 +1,13 @@ package client import ( - "time" - - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" "github.com/ray-project/kuberay/apiserver/pkg/util" + "sigs.k8s.io/controller-runtime/pkg/client/config" + rayclient "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned" rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" - "sigs.k8s.io/controller-runtime/pkg/client/config" ) type ClusterClientInterface interface { @@ -23,7 +22,7 @@ func (cc RayClusterClient) RayClusterClient(namespace string) rayv1.RayClusterIn return cc.client.RayClusters(namespace) } -func NewRayClusterClientOrFatal(initConnectionTimeout time.Duration, options util.ClientOptions) ClusterClientInterface { +func NewRayClusterClientOrFatal(options util.ClientOptions) ClusterClientInterface { cfg, err := config.GetConfig() if err != nil { klog.Fatalf("Failed to create RayCluster client. Error: %v", err) diff --git a/apiserver/pkg/client/job.go b/apiserver/pkg/client/job.go index dbcf3ec63ab..01c10eb0188 100644 --- a/apiserver/pkg/client/job.go +++ b/apiserver/pkg/client/job.go @@ -1,14 +1,13 @@ package client import ( - "time" - - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" "github.com/ray-project/kuberay/apiserver/pkg/util" + "sigs.k8s.io/controller-runtime/pkg/client/config" + rayclient "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned" rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" - "sigs.k8s.io/controller-runtime/pkg/client/config" ) type JobClientInterface interface { @@ -23,7 +22,7 @@ func (cc RayJobClient) RayJobClient(namespace string) rayv1.RayJobInterface { return cc.client.RayJobs(namespace) } -func NewRayJobClientOrFatal(initConnectionTimeout time.Duration, options util.ClientOptions) JobClientInterface { +func NewRayJobClientOrFatal(options util.ClientOptions) JobClientInterface { cfg, err := config.GetConfig() if err != nil { klog.Fatalf("Failed to create RayCluster client. Error: %v", err) diff --git a/apiserver/pkg/client/kubernetes.go b/apiserver/pkg/client/kubernetes.go index 1330f1a54c3..b0babfd9e6f 100644 --- a/apiserver/pkg/client/kubernetes.go +++ b/apiserver/pkg/client/kubernetes.go @@ -1,9 +1,7 @@ package client import ( - "time" - - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client/config" "github.com/ray-project/kuberay/apiserver/pkg/util" @@ -39,7 +37,7 @@ func (c *KubernetesClient) NamespaceClient() v1.NamespaceInterface { } // CreateKubernetesCoreOrFatal creates a new client for the Kubernetes pod. -func CreateKubernetesCoreOrFatal(initConnectionTimeout time.Duration, options util.ClientOptions) KubernetesClientInterface { +func CreateKubernetesCoreOrFatal(options util.ClientOptions) KubernetesClientInterface { cfg, err := config.GetConfig() if err != nil { klog.Fatalf("Failed to create TokenReview client. Error: %v", err) diff --git a/apiserver/pkg/client/service.go b/apiserver/pkg/client/service.go index e543d4bbcfe..a8369f296bc 100644 --- a/apiserver/pkg/client/service.go +++ b/apiserver/pkg/client/service.go @@ -1,14 +1,13 @@ package client import ( - "time" - - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" "github.com/ray-project/kuberay/apiserver/pkg/util" + "sigs.k8s.io/controller-runtime/pkg/client/config" + rayclient "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned" rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" - "sigs.k8s.io/controller-runtime/pkg/client/config" ) type ServiceClientInterface interface { @@ -23,7 +22,7 @@ func (cc RayServiceClient) RayServiceClient(namespace string) rayv1.RayServiceIn return cc.client.RayServices(namespace) } -func NewRayServiceClientOrFatal(initConnectionTimeout time.Duration, options util.ClientOptions) ServiceClientInterface { +func NewRayServiceClientOrFatal(options util.ClientOptions) ServiceClientInterface { cfg, err := config.GetConfig() if err != nil { klog.Fatalf("Failed to create RayService client. Error: %v", err) diff --git a/apiserver/pkg/manager/client_manager.go b/apiserver/pkg/manager/client_manager.go index b26546c1912..d9703d2b800 100644 --- a/apiserver/pkg/manager/client_manager.go +++ b/apiserver/pkg/manager/client_manager.go @@ -1,11 +1,9 @@ package manager import ( - "time" - "github.com/ray-project/kuberay/apiserver/pkg/client" "github.com/ray-project/kuberay/apiserver/pkg/util" - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" ) type ClientManagerInterface interface { @@ -52,7 +50,6 @@ func (c *ClientManager) init() { klog.Info("Initializing client manager") // configure configs - initConnectionTimeout := 15 * time.Second defaultKubernetesClientConfig := util.ClientOptions{ QPS: 5, Burst: 10, @@ -63,10 +60,10 @@ func (c *ClientManager) init() { // TODO: Potentially, we may need storage layer clients to help persist the data. // 2. kubernetes client initialization - c.clusterClient = client.NewRayClusterClientOrFatal(initConnectionTimeout, defaultKubernetesClientConfig) - c.jobClient = client.NewRayJobClientOrFatal(initConnectionTimeout, defaultKubernetesClientConfig) - c.serviceClient = client.NewRayServiceClientOrFatal(initConnectionTimeout, defaultKubernetesClientConfig) - c.kubernetesClient = client.CreateKubernetesCoreOrFatal(initConnectionTimeout, defaultKubernetesClientConfig) + c.clusterClient = client.NewRayClusterClientOrFatal(defaultKubernetesClientConfig) + c.jobClient = client.NewRayJobClientOrFatal(defaultKubernetesClientConfig) + c.serviceClient = client.NewRayServiceClientOrFatal(defaultKubernetesClientConfig) + c.kubernetesClient = client.CreateKubernetesCoreOrFatal(defaultKubernetesClientConfig) klog.Infof("Client manager initialized successfully") } diff --git a/apiserver/pkg/manager/resource_manager.go b/apiserver/pkg/manager/resource_manager.go index 9ba8fc7e33e..e8b4b231553 100644 --- a/apiserver/pkg/manager/resource_manager.go +++ b/apiserver/pkg/manager/resource_manager.go @@ -7,13 +7,14 @@ import ( "github.com/ray-project/kuberay/apiserver/pkg/model" "github.com/ray-project/kuberay/apiserver/pkg/util" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" clientv1 "k8s.io/client-go/kubernetes/typed/core/v1" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" ) const DefaultNamespace = "ray-system" diff --git a/apiserver/pkg/model/converter.go b/apiserver/pkg/model/converter.go old mode 100755 new mode 100644 index 625ae828059..2ee955c3f40 --- a/apiserver/pkg/model/converter.go +++ b/apiserver/pkg/model/converter.go @@ -7,11 +7,13 @@ import ( klog "k8s.io/klog/v2" - "github.com/golang/protobuf/ptypes/timestamp" "github.com/ray-project/kuberay/apiserver/pkg/util" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "google.golang.org/protobuf/types/known/timestamppb" corev1 "k8s.io/api/core/v1" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + pkgutils "github.com/ray-project/kuberay/ray-operator/pkg/utils" ) // Default annotations used by Ray nodes @@ -96,12 +98,14 @@ func FromCrdToApiClusters(clusters []*rayv1api.RayCluster, clusterEventsMap map[ func FromCrdToApiCluster(cluster *rayv1api.RayCluster, events []corev1.Event) *api.Cluster { pbCluster := &api.Cluster{ - Name: cluster.Name, - Namespace: cluster.Namespace, - Version: cluster.Labels[util.RayClusterVersionLabelKey], - User: cluster.Labels[util.RayClusterUserLabelKey], - Environment: api.Cluster_Environment(api.Cluster_Environment_value[cluster.Labels[util.RayClusterEnvironmentLabelKey]]), - CreatedAt: ×tamp.Timestamp{Seconds: cluster.CreationTimestamp.Unix()}, + Name: cluster.Name, + Namespace: cluster.Namespace, + Version: cluster.Labels[util.RayClusterVersionLabelKey], + User: cluster.Labels[util.RayClusterUserLabelKey], + Environment: api.Cluster_Environment( + api.Cluster_Environment_value[cluster.Labels[util.RayClusterEnvironmentLabelKey]], + ), + CreatedAt: ×tamppb.Timestamp{Seconds: cluster.CreationTimestamp.Unix()}, ClusterState: string(cluster.Status.State), } @@ -117,9 +121,9 @@ func FromCrdToApiCluster(cluster *rayv1api.RayCluster, events []corev1.Event) *a clusterEvent := &api.ClusterEvent{ Id: event.Name, Name: fmt.Sprintf("%s-%s", cluster.Labels[util.RayClusterNameLabelKey], event.Name), - CreatedAt: ×tamp.Timestamp{Seconds: event.ObjectMeta.CreationTimestamp.Unix()}, - FirstTimestamp: ×tamp.Timestamp{Seconds: event.FirstTimestamp.Unix()}, - LastTimestamp: ×tamp.Timestamp{Seconds: event.LastTimestamp.Unix()}, + CreatedAt: ×tamppb.Timestamp{Seconds: event.ObjectMeta.CreationTimestamp.Unix()}, + FirstTimestamp: ×tamppb.Timestamp{Seconds: event.FirstTimestamp.Unix()}, + LastTimestamp: ×tamppb.Timestamp{Seconds: event.LastTimestamp.Unix()}, Reason: event.Reason, Message: event.Message, Type: event.Type, @@ -242,9 +246,12 @@ func PopulateHeadNodeSpec(spec rayv1api.HeadGroupSpec) *api.HeadGroupSpec { headNodeSpec.EnableIngress = true } - // Here we update environment only for a container named 'ray-head' - if container, _, ok := util.GetContainerByName(spec.Template.Spec.Containers, "ray-head"); ok && len(container.Env) > 0 { - headNodeSpec.Environment = convert_env_variables(container.Env, true) + // Here we update environment and security context only for a container named 'ray-head' + if container, _, ok := util.GetContainerByName(spec.Template.Spec.Containers, "ray-head"); ok { + if len(container.Env) > 0 { + headNodeSpec.Environment = convertEnvVariables(container.Env, true) + } + headNodeSpec.SecurityContext = convertSecurityContext(container.SecurityContext) } if len(spec.Template.Spec.ServiceAccountName) > 1 { @@ -254,6 +261,9 @@ func PopulateHeadNodeSpec(spec rayv1api.HeadGroupSpec) *api.HeadGroupSpec { if len(spec.Template.Spec.ImagePullSecrets) > 0 { headNodeSpec.ImagePullSecret = spec.Template.Spec.ImagePullSecrets[0].Name } + if spec.Template.Spec.Containers[0].ImagePullPolicy == corev1.PullAlways { + headNodeSpec.ImagePullPolicy = "Always" + } return headNodeSpec } @@ -287,9 +297,12 @@ func PopulateWorkerNodeSpec(specs []rayv1api.WorkerGroupSpec) []*api.WorkerGroup workerNodeSpec.Labels = spec.Template.Labels } - // Here we update environment only for a container named 'ray-worker' - if container, _, ok := util.GetContainerByName(spec.Template.Spec.Containers, "ray-worker"); ok && len(container.Env) > 0 { - workerNodeSpec.Environment = convert_env_variables(container.Env, false) + // Here we update environment and security context only for a container named 'ray-worker' + if container, _, ok := util.GetContainerByName(spec.Template.Spec.Containers, "ray-worker"); ok { + if len(container.Env) > 0 { + workerNodeSpec.Environment = convertEnvVariables(container.Env, false) + } + workerNodeSpec.SecurityContext = convertSecurityContext(container.SecurityContext) } if len(spec.Template.Spec.ServiceAccountName) > 1 { @@ -299,6 +312,9 @@ func PopulateWorkerNodeSpec(specs []rayv1api.WorkerGroupSpec) []*api.WorkerGroup if len(spec.Template.Spec.ImagePullSecrets) > 0 { workerNodeSpec.ImagePullSecret = spec.Template.Spec.ImagePullSecrets[0].Name } + if spec.Template.Spec.Containers[0].ImagePullPolicy == corev1.PullAlways { + workerNodeSpec.ImagePullPolicy = "Always" + } workerNodeSpecs = append(workerNodeSpecs, workerNodeSpec) } @@ -306,7 +322,26 @@ func PopulateWorkerNodeSpec(specs []rayv1api.WorkerGroupSpec) []*api.WorkerGroup return workerNodeSpecs } -func convert_env_variables(cenv []corev1.EnvVar, header bool) *api.EnvironmentVariables { +func convertSecurityContext(securityCtx *corev1.SecurityContext) *api.SecurityContext { + if securityCtx == nil { + return nil + } + result := &api.SecurityContext{ + Privileged: securityCtx.Privileged, + Capabilities: &api.Capabilities{}, + } + if securityCtx.Capabilities != nil { + for _, cap := range securityCtx.Capabilities.Add { + result.Capabilities.Add = append(result.Capabilities.Add, string(cap)) + } + for _, cap := range securityCtx.Capabilities.Drop { + result.Capabilities.Drop = append(result.Capabilities.Drop, string(cap)) + } + } + return result +} + +func convertEnvVariables(cenv []corev1.EnvVar, header bool) *api.EnvironmentVariables { env := api.EnvironmentVariables{ Values: make(map[string]string), ValuesFrom: make(map[string]*api.EnvValueFrom), @@ -379,11 +414,21 @@ func FromKubeToAPIComputeTemplate(configMap *corev1.ConfigMap) *api.ComputeTempl runtime.Memory = uint32(memory) runtime.Gpu = uint32(gpu) runtime.GpuAccelerator = configMap.Data["gpu_accelerator"] - val, ok := configMap.Data["tolerations"] + + val, ok := configMap.Data["extended_resources"] + if ok { + err := json.Unmarshal(pkgutils.ConvertStringToByteSlice(val), &runtime.ExtendedResources) + if err != nil { + klog.Error("failed to unmarshall extended resources for compute template ", runtime.Name, " value ", + runtime.ExtendedResources, " error ", err) + } + } + + val, ok = configMap.Data["tolerations"] if ok { - err := json.Unmarshal([]byte(val), &runtime.Tolerations) + err := json.Unmarshal(pkgutils.ConvertStringToByteSlice(val), &runtime.Tolerations) if err != nil { - klog.Errorf("failed to unmarshall tolerations for compute template ", runtime.Name, " value ", + klog.Error("failed to unmarshall tolerations for compute template ", runtime.Name, " value ", runtime.Tolerations, " error ", err) } } @@ -423,7 +468,7 @@ func FromCrdToApiJob(job *rayv1api.RayJob) (pbJob *api.RayJob) { RuntimeEnv: job.Spec.RuntimeEnvYAML, JobId: job.Status.JobId, ShutdownAfterJobFinishes: job.Spec.ShutdownAfterJobFinishes, - CreatedAt: ×tamp.Timestamp{Seconds: job.CreationTimestamp.Unix()}, + CreatedAt: ×tamppb.Timestamp{Seconds: job.CreationTimestamp.Unix()}, JobStatus: string(job.Status.JobStatus), JobDeploymentStatus: string(job.Status.JobDeploymentStatus), Message: job.Status.Message, @@ -441,7 +486,7 @@ func FromCrdToApiJob(job *rayv1api.RayJob) (pbJob *api.RayJob) { pbJob.TtlSecondsAfterFinished = job.Spec.TTLSecondsAfterFinished if job.DeletionTimestamp != nil { - pbJob.DeleteAt = ×tamp.Timestamp{Seconds: job.DeletionTimestamp.Unix()} + pbJob.DeleteAt = ×tamppb.Timestamp{Seconds: job.DeletionTimestamp.Unix()} } if job.Spec.SubmitterPodTemplate != nil { @@ -465,10 +510,23 @@ func FromCrdToApiJob(job *rayv1api.RayJob) (pbJob *api.RayJob) { pbJob.EntrypointResources = jres } + if jstarttime := job.Status.StartTime; jstarttime != nil { + pbJob.StartTime = timestamppb.New(job.Status.StartTime.Time) + } + if jendtime := job.Status.EndTime; jendtime != nil { + pbJob.EndTime = timestamppb.New(job.Status.EndTime.Time) + } + if jclustername := job.Status.RayClusterName; jclustername != "" { + pbJob.RayClusterName = jclustername + } + return pbJob } -func FromCrdToApiServices(services []*rayv1api.RayService, serviceEventsMap map[string][]corev1.Event) []*api.RayService { +func FromCrdToApiServices( + services []*rayv1api.RayService, + serviceEventsMap map[string][]corev1.Event, +) []*api.RayService { apiServices := make([]*api.RayService, 0) for _, service := range services { apiServices = append(apiServices, FromCrdToApiService(service, serviceEventsMap[service.Name])) @@ -489,16 +547,20 @@ func FromCrdToApiService(service *rayv1api.RayService, events []corev1.Event) *a deleteTime = service.DeletionTimestamp.Unix() } pbService := &api.RayService{ - Name: service.Name, - Namespace: service.Namespace, - User: service.Labels[util.RayClusterUserLabelKey], - ServeConfig_V2: service.Spec.ServeConfigV2, - ClusterSpec: PopulateRayClusterSpec(service.Spec.RayClusterSpec), - ServiceUnhealthySecondThreshold: PoplulateUnhealthySecondThreshold(service.Spec.ServiceUnhealthySecondThreshold), - DeploymentUnhealthySecondThreshold: PoplulateUnhealthySecondThreshold(service.Spec.DeploymentUnhealthySecondThreshold), - RayServiceStatus: PoplulateRayServiceStatus(service.Name, service.Status, events), - CreatedAt: ×tamp.Timestamp{Seconds: service.CreationTimestamp.Unix()}, - DeleteAt: ×tamp.Timestamp{Seconds: deleteTime}, + Name: service.Name, + Namespace: service.Namespace, + User: service.Labels[util.RayClusterUserLabelKey], + ServeConfig_V2: service.Spec.ServeConfigV2, + ClusterSpec: PopulateRayClusterSpec(service.Spec.RayClusterSpec), + ServiceUnhealthySecondThreshold: PoplulateUnhealthySecondThreshold( + service.Spec.ServiceUnhealthySecondThreshold, + ), + DeploymentUnhealthySecondThreshold: PoplulateUnhealthySecondThreshold( + service.Spec.DeploymentUnhealthySecondThreshold, + ), + RayServiceStatus: PoplulateRayServiceStatus(service.Name, service.Status, events), + CreatedAt: ×tamppb.Timestamp{Seconds: service.CreationTimestamp.Unix()}, + DeleteAt: ×tamppb.Timestamp{Seconds: deleteTime}, } return pbService } @@ -510,7 +572,11 @@ func PoplulateUnhealthySecondThreshold(value *int32) int32 { return *value } -func PoplulateRayServiceStatus(serviceName string, serviceStatus rayv1api.RayServiceStatuses, events []corev1.Event) *api.RayServiceStatus { +func PoplulateRayServiceStatus( + serviceName string, + serviceStatus rayv1api.RayServiceStatuses, + events []corev1.Event, +) *api.RayServiceStatus { status := &api.RayServiceStatus{ RayServiceEvents: PopulateRayServiceEvent(serviceName, events), RayClusterName: serviceStatus.ActiveServiceStatus.RayClusterName, @@ -524,7 +590,9 @@ func PoplulateRayServiceStatus(serviceName string, serviceStatus rayv1api.RaySer return status } -func PopulateServeApplicationStatus(serveApplicationStatuses map[string]rayv1api.AppStatus) []*api.ServeApplicationStatus { +func PopulateServeApplicationStatus( + serveApplicationStatuses map[string]rayv1api.AppStatus, +) []*api.ServeApplicationStatus { appStatuses := make([]*api.ServeApplicationStatus, 0) for appName, appStatus := range serveApplicationStatuses { ds := &api.ServeApplicationStatus{ @@ -538,7 +606,9 @@ func PopulateServeApplicationStatus(serveApplicationStatuses map[string]rayv1api return appStatuses } -func PopulateServeDeploymentStatus(serveDeploymentStatuses map[string]rayv1api.ServeDeploymentStatus) []*api.ServeDeploymentStatus { +func PopulateServeDeploymentStatus( + serveDeploymentStatuses map[string]rayv1api.ServeDeploymentStatus, +) []*api.ServeDeploymentStatus { deploymentStatuses := make([]*api.ServeDeploymentStatus, 0) for deploymentName, deploymentStatus := range serveDeploymentStatuses { ds := &api.ServeDeploymentStatus{ @@ -557,9 +627,9 @@ func PopulateRayServiceEvent(serviceName string, events []corev1.Event) []*api.R serviceEvent := &api.RayServiceEvent{ Id: event.Name, Name: fmt.Sprintf("%s-%s", serviceName, event.Name), - CreatedAt: ×tamp.Timestamp{Seconds: event.ObjectMeta.CreationTimestamp.Unix()}, - FirstTimestamp: ×tamp.Timestamp{Seconds: event.FirstTimestamp.Unix()}, - LastTimestamp: ×tamp.Timestamp{Seconds: event.LastTimestamp.Unix()}, + CreatedAt: ×tamppb.Timestamp{Seconds: event.ObjectMeta.CreationTimestamp.Unix()}, + FirstTimestamp: ×tamppb.Timestamp{Seconds: event.FirstTimestamp.Unix()}, + LastTimestamp: ×tamppb.Timestamp{Seconds: event.LastTimestamp.Unix()}, Reason: event.Reason, Message: event.Message, Type: event.Type, diff --git a/apiserver/pkg/model/converter_test.go b/apiserver/pkg/model/converter_test.go index f082eb86362..212103c3e4a 100644 --- a/apiserver/pkg/model/converter_test.go +++ b/apiserver/pkg/model/converter_test.go @@ -4,16 +4,18 @@ import ( "fmt" "reflect" "testing" + "time" util "github.com/ray-project/kuberay/apiserver/pkg/util" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) var ( @@ -69,8 +71,9 @@ var headSpecTest = rayv1api.HeadGroupSpec{ }, Containers: []corev1.Container{ { - Name: "ray-head", - Image: "blublinsky1/ray310:2.5.0", + Name: "ray-head", + Image: "blublinsky1/ray310:2.5.0", + ImagePullPolicy: "Always", Env: []corev1.EnvVar{ { Name: "AWS_KEY", @@ -116,6 +119,13 @@ var headSpecTest = rayv1api.HeadGroupSpec{ }, }, }, + SecurityContext: &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + "SYS_PTRACE", + }, + }, + }, }, }, }, @@ -124,24 +134,26 @@ var headSpecTest = rayv1api.HeadGroupSpec{ var configMapWithoutTolerations = corev1.ConfigMap{ Data: map[string]string{ - "cpu": "4", - "gpu": "0", - "gpu_accelerator": "", - "memory": "8", - "name": "head-node-template", - "namespace": "max", + "cpu": "4", + "gpu": "0", + "gpu_accelerator": "", + "memory": "8", + "extended_resources": "{\"vpc.amazonaws.com/efa\": 32}", + "name": "head-node-template", + "namespace": "max", }, } var configMapWithTolerations = corev1.ConfigMap{ Data: map[string]string{ - "cpu": "4", - "gpu": "0", - "gpu_accelerator": "", - "memory": "8", - "name": "head-node-template", - "namespace": "max", - "tolerations": "[{\"key\":\"blah1\",\"operator\":\"Exists\",\"effect\":\"NoExecute\"}]", + "cpu": "4", + "gpu": "0", + "gpu_accelerator": "", + "memory": "8", + "extended_resources": "{\"vpc.amazonaws.com/efa\": 32}", + "name": "head-node-template", + "namespace": "max", + "tolerations": "[{\"key\":\"blah1\",\"operator\":\"Exists\",\"effect\":\"NoExecute\"}]", }, } @@ -220,6 +232,13 @@ var workerSpecTest = rayv1api.WorkerGroupSpec{ Value: "1", }, }, + SecurityContext: &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + "SYS_PTRACE", + }, + }, + }, }, }, }, @@ -255,11 +274,11 @@ var ClusterSpecAutoscalerTest = rayv1api.RayCluster{ WorkerGroupSpecs: []rayv1api.WorkerGroupSpec{ workerSpecTest, }, - EnableInTreeAutoscaling: pointer.Bool(true), + EnableInTreeAutoscaling: ptr.To(true), AutoscalerOptions: &rayv1api.AutoscalerOptions{ - IdleTimeoutSeconds: pointer.Int32(int32(60)), - UpscalingMode: (*rayv1api.UpscalingMode)(pointer.String("Default")), - ImagePullPolicy: (*corev1.PullPolicy)(pointer.String("Always")), + IdleTimeoutSeconds: ptr.To[int32](int32(60)), + UpscalingMode: (*rayv1api.UpscalingMode)(ptr.To("Default")), + ImagePullPolicy: (*corev1.PullPolicy)(ptr.To("Always")), Resources: &corev1.ResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("500m"), @@ -346,6 +365,30 @@ var JobExistingClusterSubmitterTest = rayv1api.RayJob{ }, } +var JobWithOutputTest = rayv1api.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + Labels: map[string]string{ + "ray.io/user": "user", + }, + }, + Spec: rayv1api.RayJobSpec{ + Entrypoint: "python /home/ray/samples/sample_code.py", + RuntimeEnvYAML: "mytest yaml", + TTLSecondsAfterFinished: secondsValue, + RayClusterSpec: &ClusterSpecTest.Spec, + }, + Status: rayv1api.RayJobStatus{ + JobStatus: "RUNNING", + JobDeploymentStatus: "Initializing", + Message: "Job is currently running", + RayClusterName: "raycluster-sample-xxxxx", + StartTime: &metav1.Time{Time: time.Date(2024, 0o7, 25, 0, 0, 0, 0, time.UTC)}, + EndTime: nil, + }, +} + var ServiceV2Test = rayv1api.RayService{ ObjectMeta: metav1.ObjectMeta{ Name: "test", @@ -362,10 +405,10 @@ var ServiceV2Test = rayv1api.RayService{ } var autoscalerOptions = &rayv1api.AutoscalerOptions{ - IdleTimeoutSeconds: pointer.Int32(int32(60)), - UpscalingMode: (*rayv1api.UpscalingMode)(pointer.String("Default")), - Image: pointer.String("Some Image"), - ImagePullPolicy: (*corev1.PullPolicy)(pointer.String("Always")), + IdleTimeoutSeconds: ptr.To[int32](int32(60)), + UpscalingMode: (*rayv1api.UpscalingMode)(ptr.To("Default")), + Image: ptr.To("Some Image"), + ImagePullPolicy: (*corev1.PullPolicy)(ptr.To("Always")), Env: []corev1.EnvVar{ { Name: "n1", @@ -393,13 +436,13 @@ var autoscalerOptions = &rayv1api.AutoscalerOptions{ Name: "vmount1", MountPath: "path1", ReadOnly: false, - MountPropagation: (*corev1.MountPropagationMode)(pointer.String("None")), + MountPropagation: (*corev1.MountPropagationMode)(ptr.To("None")), }, { Name: "vmount2", MountPath: "path2", ReadOnly: true, - MountPropagation: (*corev1.MountPropagationMode)(pointer.String("HostToContainer")), + MountPropagation: (*corev1.MountPropagationMode)(ptr.To("HostToContainer")), }, }, Resources: &corev1.ResourceRequirements{ @@ -471,6 +514,9 @@ func TestPopulateHeadNodeSpec(t *testing.T) { if groupSpec.ImagePullSecret != "foo" { t.Errorf("failed to convert image pull secret") } + if groupSpec.ImagePullPolicy != "Always" { + t.Errorf("failed to convert image pull policy") + } if !reflect.DeepEqual(groupSpec.Annotations, expectedAnnotations) { t.Errorf("failed to convert annotations, got %v, expected %v", groupSpec.Annotations, expectedAnnotations) } @@ -480,6 +526,10 @@ func TestPopulateHeadNodeSpec(t *testing.T) { if !reflect.DeepEqual(groupSpec.Environment, expectedHeadEnv) { t.Errorf("failed to convert environment, got %v, expected %v", groupSpec.Environment, expectedHeadEnv) } + // Cannot use deep equal since protobuf locks copying + if groupSpec.SecurityContext == nil || groupSpec.SecurityContext.Capabilities == nil || len(groupSpec.SecurityContext.Capabilities.Add) != 1 { + t.Errorf("failed to convert security context") + } } func TestPopulateWorkerNodeSpec(t *testing.T) { @@ -500,6 +550,9 @@ func TestPopulateWorkerNodeSpec(t *testing.T) { if !reflect.DeepEqual(groupSpec.Environment, expectedEnv) { t.Errorf("failed to convert environment, got %v, expected %v", groupSpec.Environment, expectedEnv) } + if groupSpec.SecurityContext == nil || groupSpec.SecurityContext.Capabilities == nil || len(groupSpec.SecurityContext.Capabilities.Add) != 1 { + t.Errorf("failed to convert security context") + } } func TestAutoscalerOptions(t *testing.T) { @@ -573,10 +626,24 @@ func TestPopulateTemplate(t *testing.T) { t.Errorf("failed to convert config map, got %v, expected %v", tolerationToString(template.Tolerations[0]), tolerationToString(&expectedTolerations)) } + + assert.Equal(t, uint32(4), template.Cpu, "CPU mismatch") + assert.Equal(t, uint32(8), template.Memory, "Memory mismatch") + assert.Equal(t, uint32(0), template.Gpu, "GPU mismatch") + assert.Equal( + t, + map[string]uint32{"vpc.amazonaws.com/efa": 32}, + template.ExtendedResources, + "Extended resources mismatch", + ) } func tolerationToString(toleration *api.PodToleration) string { - return "Key: " + toleration.Key + " Operator: " + string(toleration.Operator) + " Effect: " + string(toleration.Effect) + return "Key: " + toleration.Key + " Operator: " + string( + toleration.Operator, + ) + " Effect: " + string( + toleration.Effect, + ) } func TestPopulateJob(t *testing.T) { @@ -608,4 +675,13 @@ func TestPopulateJob(t *testing.T) { assert.Nil(t, job.ClusterSpec) assert.Equal(t, "image", job.JobSubmitter.Image) assert.Equal(t, "2", job.JobSubmitter.Cpu) + + job = FromCrdToApiJob(&JobWithOutputTest) + fmt.Printf("jobWithOutput = %#v\n", job) + assert.Equal(t, time.Date(2024, 0o7, 25, 0, 0, 0, 0, time.UTC), job.StartTime.AsTime()) + assert.Nil(t, job.EndTime) + assert.Equal(t, "RUNNING", job.JobStatus) + assert.Equal(t, "Initializing", job.JobDeploymentStatus) + assert.Equal(t, "Job is currently running", job.Message) + assert.Equal(t, "raycluster-sample-xxxxx", job.RayClusterName) } diff --git a/apiserver/pkg/model/volumes_test.go b/apiserver/pkg/model/volumes_test.go index cd44601217e..2d98df5ce1d 100644 --- a/apiserver/pkg/model/volumes_test.go +++ b/apiserver/pkg/model/volumes_test.go @@ -83,7 +83,7 @@ var podTemplateTest = corev1.PodTemplateSpec{ }, }, Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ + Resources: corev1.VolumeResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceStorage: resource.MustParse("5Gi"), }, diff --git a/apiserver/pkg/server/cluster_server.go b/apiserver/pkg/server/cluster_server.go index 8315c804ecc..a07ca94e644 100644 --- a/apiserver/pkg/server/cluster_server.go +++ b/apiserver/pkg/server/cluster_server.go @@ -3,7 +3,6 @@ package server import ( "context" - "github.com/golang/protobuf/ptypes/empty" "github.com/ray-project/kuberay/apiserver/pkg/manager" "github.com/ray-project/kuberay/apiserver/pkg/model" "github.com/ray-project/kuberay/apiserver/pkg/util" @@ -119,7 +118,7 @@ func (s *ClusterServer) ListAllClusters(ctx context.Context, request *api.ListAl // Deletes an Cluster without deleting the Cluster's runs and jobs. To // avoid unexpected behaviors, delete an Cluster's runs and jobs before // deleting the Cluster. -func (s *ClusterServer) DeleteCluster(ctx context.Context, request *api.DeleteClusterRequest) (*empty.Empty, error) { +func (s *ClusterServer) DeleteCluster(ctx context.Context, request *api.DeleteClusterRequest) (*emptypb.Empty, error) { if request.Name == "" { return nil, util.NewInvalidInputError("Cluster name is empty. Please specify a valid value.") } diff --git a/apiserver/pkg/server/ray_job_submission_service_server.go b/apiserver/pkg/server/ray_job_submission_service_server.go index 9652ec0cecd..9723e7dbca5 100644 --- a/apiserver/pkg/server/ray_job_submission_service_server.go +++ b/apiserver/pkg/server/ray_job_submission_service_server.go @@ -12,12 +12,13 @@ import ( "github.com/go-logr/logr" "github.com/go-logr/zerologr" api "github.com/ray-project/kuberay/proto/go_client" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/rs/zerolog" "google.golang.org/protobuf/types/known/emptypb" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/yaml" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) type RayJobSubmissionServiceServerOptions struct { @@ -38,7 +39,7 @@ type RayJobSubmissionServiceServer struct { // Create RayJobSubmissionServiceServer func NewRayJobSubmissionServiceServer(clusterServer *ClusterServer, options *RayJobSubmissionServiceServerOptions) *RayJobSubmissionServiceServer { zl := zerolog.New(os.Stdout).Level(zerolog.DebugLevel) - return &RayJobSubmissionServiceServer{clusterServer: clusterServer, options: options, log: zerologr.New(&zl).WithName("jobsubmissionservice"), dashboardClientFunc: utils.GetRayDashboardClient} + return &RayJobSubmissionServiceServer{clusterServer: clusterServer, options: options, log: zerologr.New(&zl).WithName("jobsubmissionservice"), dashboardClientFunc: utils.GetRayDashboardClientFunc(nil, false)} } // Submit Ray job @@ -50,7 +51,10 @@ func (s *RayJobSubmissionServiceServer) SubmitRayJob(ctx context.Context, req *a return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } request := &utils.RayJobRequest{Entrypoint: req.Jobsubmission.Entrypoint} if req.Jobsubmission.SubmissionId != "" { request.SubmissionId = req.Jobsubmission.SubmissionId @@ -102,7 +106,10 @@ func (s *RayJobSubmissionServiceServer) GetJobDetails(ctx context.Context, req * return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } nodeInfo, err := rayDashboardClient.GetJobInfo(ctx, req.Submissionid) if err != nil { return nil, err @@ -122,7 +129,10 @@ func (s *RayJobSubmissionServiceServer) GetJobLog(ctx context.Context, req *api. return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } jlog, err := rayDashboardClient.GetJobLog(ctx, req.Submissionid) if err != nil { return nil, err @@ -142,7 +152,10 @@ func (s *RayJobSubmissionServiceServer) ListJobDetails(ctx context.Context, req return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } nodesInfo, err := rayDashboardClient.ListJobs(ctx) if err != nil { return nil, err @@ -163,7 +176,10 @@ func (s *RayJobSubmissionServiceServer) StopRayJob(ctx context.Context, req *api return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } err = rayDashboardClient.StopJob(ctx, req.Submissionid) if err != nil { return nil, err @@ -180,7 +196,10 @@ func (s *RayJobSubmissionServiceServer) DeleteRayJob(ctx context.Context, req *a return nil, err } rayDashboardClient := s.dashboardClientFunc() - rayDashboardClient.InitClient(*url) + // TODO: support proxy subresources in kuberay-apiserver + if err := rayDashboardClient.InitClient(ctx, *url, nil); err != nil { + return nil, err + } err = rayDashboardClient.DeleteJob(ctx, req.Submissionid) if err != nil { return nil, err diff --git a/apiserver/pkg/server/validations.go b/apiserver/pkg/server/validations.go index fa041611e95..0a22bdbd7e5 100644 --- a/apiserver/pkg/server/validations.go +++ b/apiserver/pkg/server/validations.go @@ -20,6 +20,10 @@ func ValidateClusterSpec(clusterSpec *api.ClusterSpec) error { if len(clusterSpec.HeadGroupSpec.RayStartParams) == 0 { return util.NewInvalidInputError("HeadGroupSpec RayStartParams is empty. Please specify values.") } + if len(clusterSpec.HeadGroupSpec.ImagePullPolicy) > 0 && + clusterSpec.HeadGroupSpec.ImagePullPolicy != "Always" && clusterSpec.HeadGroupSpec.ImagePullPolicy != "IfNotPresent" { + return util.NewInvalidInputError("HeadGroupSpec unsupported value for Image pull policy. Please specify Always or IfNotPresent") + } for index, spec := range clusterSpec.WorkerGroupSpec { if len(spec.GroupName) == 0 { @@ -34,6 +38,9 @@ func ValidateClusterSpec(clusterSpec *api.ClusterSpec) error { if spec.MinReplicas > spec.MaxReplicas { return util.NewInvalidInputError("WorkerNodeSpec %d MinReplica > MaxReplicas. Please specify a valid value.", index) } + if len(spec.ImagePullPolicy) > 0 && spec.ImagePullPolicy != "Always" && spec.ImagePullPolicy != "IfNotPresent" { + return util.NewInvalidInputError("Worker GroupSpec unsupported value for Image pull policy. Please specify Always or IfNotPresent") + } } return nil } diff --git a/apiserver/pkg/server/validations_test.go b/apiserver/pkg/server/validations_test.go index 2611e659b8a..de366d35e91 100644 --- a/apiserver/pkg/server/validations_test.go +++ b/apiserver/pkg/server/validations_test.go @@ -73,6 +73,21 @@ func TestValidateClusterSpec(t *testing.T) { }, expectedError: util.NewInvalidInputError("HeadGroupSpec RayStartParams is empty. Please specify values."), }, + { + name: "A head group with A wrong image pull policy", + clusterSpec: &api.ClusterSpec{ + HeadGroupSpec: &api.HeadGroupSpec{ + ComputeTemplate: "a template", + RayStartParams: map[string]string{ + "dashboard-host": "0.0.0.0", + "metrics-export-port": "8080", + }, + ImagePullPolicy: "foo", + }, + WorkerGroupSpec: []*api.WorkerGroupSpec{}, + }, + expectedError: util.NewInvalidInputError("HeadGroupSpec unsupported value for Image pull policy. Please specify Always or IfNotPresent"), + }, { name: "An empty worker group", clusterSpec: &api.ClusterSpec{ diff --git a/apiserver/pkg/util/cluster.go b/apiserver/pkg/util/cluster.go old mode 100755 new mode 100644 index 658ac500dde..e1679ea5695 --- a/apiserver/pkg/util/cluster.go +++ b/apiserver/pkg/util/cluster.go @@ -6,14 +6,14 @@ import ( "fmt" "net" "strconv" - - klog "k8s.io/klog/v2" + "strings" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) type RayCluster struct { @@ -143,6 +143,25 @@ func buildNodeGroupAnnotations(computeTemplate *api.ComputeTemplate, image strin return annotations } +// Add resource to container +func addResourceToContainer(container *corev1.Container, resourceName string, quantity uint32) { + if quantity == 0 { + return + } + quantityStr := fmt.Sprint(quantity) + resourceQuantity := resource.MustParse(quantityStr) + + if container.Resources.Requests == nil { + container.Resources.Requests = make(corev1.ResourceList) + } + if container.Resources.Limits == nil { + container.Resources.Limits = make(corev1.ResourceList) + } + + container.Resources.Requests[corev1.ResourceName(resourceName)] = resourceQuantity + container.Resources.Limits[corev1.ResourceName(resourceName)] = resourceQuantity +} + // Build head node template func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, spec *api.HeadGroupSpec, computeRuntime *api.ComputeTemplate, enableServeService bool) (*corev1.PodTemplateSpec, error) { image := constructRayImage(RayClusterDefaultImageRepository, imageVersion) @@ -150,6 +169,13 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s image = spec.Image } + // Image pull policy. Kubernetes default image pull policy IfNotPresent, so we here only + // Overwrite it if it is Always + imagePullPolicy := corev1.PullIfNotPresent + if len(spec.ImagePullPolicy) > 0 && strings.ToLower(spec.ImagePullPolicy) == "always" { + imagePullPolicy = corev1.PullAlways + } + // calculate resources cpu := fmt.Sprint(computeRuntime.GetCpu()) memory := fmt.Sprintf("%d%s", computeRuntime.GetMemory(), "Gi") @@ -170,8 +196,9 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s Tolerations: []corev1.Toleration{}, Containers: []corev1.Container{ { - Name: "ray-head", - Image: image, + Name: "ray-head", + Image: image, + ImagePullPolicy: imagePullPolicy, Env: []corev1.EnvVar{ { Name: "MY_POD_IP", @@ -212,7 +239,8 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s corev1.ResourceMemory: resource.MustParse(memory), }, }, - VolumeMounts: volMounts, + VolumeMounts: volMounts, + SecurityContext: buildSecurityContext(spec.SecurityContext), }, }, Volumes: vols, @@ -222,15 +250,18 @@ func buildHeadPodTemplate(imageVersion string, envs *api.EnvironmentVariables, s // We are filtering container by name `ray-head`. If container with this name does not exist // (should never happen) we are not adding container specific parameters if container, index, ok := GetContainerByName(podTemplateSpec.Spec.Containers, "ray-head"); ok { - if computeRuntime.GetGpu() != 0 { - gpu := computeRuntime.GetGpu() + if gpu := computeRuntime.GetGpu(); gpu != 0 { accelerator := "nvidia.com/gpu" if len(computeRuntime.GetGpuAccelerator()) != 0 { accelerator = computeRuntime.GetGpuAccelerator() } - container.Resources.Requests[corev1.ResourceName(accelerator)] = resource.MustParse(fmt.Sprint(gpu)) - container.Resources.Limits[corev1.ResourceName(accelerator)] = resource.MustParse(fmt.Sprint(gpu)) + addResourceToContainer(&container, accelerator, gpu) } + + for k, v := range computeRuntime.GetExtendedResources() { + addResourceToContainer(&container, k, v) + } + globalEnv := convertEnvironmentVariables(envs) if len(globalEnv) > 0 { container.Env = append(container.Env, globalEnv...) @@ -392,6 +423,13 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, image = spec.Image } + // Image pull policy. Kubernetes default image pull policy IfNotPresent, so we here only + // Overwrite it if it is Always + imagePullPolicy := corev1.PullIfNotPresent + if len(spec.ImagePullPolicy) > 0 && strings.ToLower(spec.ImagePullPolicy) == "always" { + imagePullPolicy = corev1.PullAlways + } + // calculate resources cpu := fmt.Sprint(computeRuntime.GetCpu()) memory := fmt.Sprintf("%d%s", computeRuntime.GetMemory(), "Gi") @@ -412,8 +450,9 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, Tolerations: []corev1.Toleration{}, Containers: []corev1.Container{ { - Name: "ray-worker", - Image: image, + Name: "ray-worker", + Image: image, + ImagePullPolicy: imagePullPolicy, Env: []corev1.EnvVar{ { Name: "RAY_DISABLE_DOCKER_CPU_WARNING", @@ -446,7 +485,7 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, ValueFrom: &corev1.EnvVarSource{ ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: "ray-worker", - Resource: "requests.cpu", + Resource: "requests.memory", }, }, }, @@ -455,7 +494,7 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, ValueFrom: &corev1.EnvVarSource{ ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: "ray-worker", - Resource: "limits.cpu", + Resource: "limits.memory", }, }, }, @@ -500,7 +539,8 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, corev1.ResourceMemory: resource.MustParse(memory), }, }, - VolumeMounts: volMounts, + VolumeMounts: volMounts, + SecurityContext: buildSecurityContext(spec.SecurityContext), }, }, Volumes: vols, @@ -510,16 +550,16 @@ func buildWorkerPodTemplate(imageVersion string, envs *api.EnvironmentVariables, // We are filtering container by name `ray-worker`. If container with this name does not exist // (should never happen) we are not adding container specific parameters if container, index, ok := GetContainerByName(podTemplateSpec.Spec.Containers, "ray-worker"); ok { - if computeRuntime.GetGpu() != 0 { - gpu := computeRuntime.GetGpu() + if gpu := computeRuntime.GetGpu(); gpu != 0 { accelerator := "nvidia.com/gpu" if len(computeRuntime.GetGpuAccelerator()) != 0 { accelerator = computeRuntime.GetGpuAccelerator() } + addResourceToContainer(&container, accelerator, gpu) + } - // need smarter algorithm to filter main container. for example filter by name `ray-worker` - container.Resources.Requests[corev1.ResourceName(accelerator)] = resource.MustParse(fmt.Sprint(gpu)) - container.Resources.Limits[corev1.ResourceName(accelerator)] = resource.MustParse(fmt.Sprint(gpu)) + for k, v := range computeRuntime.GetExtendedResources() { + addResourceToContainer(&container, k, v) } globalEnv := convertEnvironmentVariables(envs) @@ -724,7 +764,7 @@ func buildVols(apiVolumes []*api.Volume) ([]corev1.Volume, error) { }, }, Spec: corev1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ + Resources: corev1.VolumeResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceStorage: resource.MustParse(rayVol.Storage), }, @@ -765,6 +805,26 @@ func buildVols(apiVolumes []*api.Volume) ([]corev1.Volume, error) { return vols, nil } +// Build security context +func buildSecurityContext(securityCtx *api.SecurityContext) *corev1.SecurityContext { + if securityCtx == nil { + return nil + } + result := &corev1.SecurityContext{ + Privileged: securityCtx.Privileged, + Capabilities: &corev1.Capabilities{}, + } + if securityCtx.Capabilities != nil { + for _, cap := range securityCtx.Capabilities.Add { + result.Capabilities.Add = append(result.Capabilities.Add, corev1.Capability(cap)) + } + for _, cap := range securityCtx.Capabilities.Drop { + result.Capabilities.Drop = append(result.Capabilities.Drop, corev1.Capability(cap)) + } + } + return result +} + // Init pointer func intPointer(value int32) *int32 { return &value @@ -782,24 +842,28 @@ func (c *RayCluster) SetAnnotationsToAllTemplates(key string, value string) { // Build compute template func NewComputeTemplate(runtime *api.ComputeTemplate) (*corev1.ConfigMap, error) { + extendedResourcesJSON, err := json.Marshal(runtime.ExtendedResources) + if err != nil { + return nil, fmt.Errorf("failed to marshal extended resources: %v", err) + } + // Create data map dmap := map[string]string{ - "name": runtime.Name, - "namespace": runtime.Namespace, - "cpu": strconv.FormatUint(uint64(runtime.Cpu), 10), - "memory": strconv.FormatUint(uint64(runtime.Memory), 10), - "gpu": strconv.FormatUint(uint64(runtime.Gpu), 10), - "gpu_accelerator": runtime.GpuAccelerator, + "name": runtime.Name, + "namespace": runtime.Namespace, + "cpu": strconv.FormatUint(uint64(runtime.Cpu), 10), + "memory": strconv.FormatUint(uint64(runtime.Memory), 10), + "gpu": strconv.FormatUint(uint64(runtime.Gpu), 10), + "gpu_accelerator": runtime.GpuAccelerator, + "extended_resources": string(extendedResourcesJSON), } // Add tolerations in defined if runtime.Tolerations != nil && len(runtime.Tolerations) > 0 { t, err := json.Marshal(runtime.Tolerations) if err != nil { - klog.Errorf("failed to marshall tolerations ", runtime.Tolerations, " for compute template ", runtime.Name, - " error ", err) - } else { - dmap["tolerations"] = string(t) + return nil, fmt.Errorf("failed to marshal tolerations for compute template %s: %w", runtime.Name, err) } + dmap["tolerations"] = string(t) } config := &corev1.ConfigMap{ @@ -858,9 +922,11 @@ func buildAutoscalerOptions(autoscalerOptions *api.AutoscalerOptions) (*rayv1api if len(autoscalerOptions.Image) > 0 { options.Image = &autoscalerOptions.Image } - if len(autoscalerOptions.ImagePullPolicy) > 0 { - options.ImagePullPolicy = (*corev1.PullPolicy)(&autoscalerOptions.ImagePullPolicy) + if len(autoscalerOptions.ImagePullPolicy) > 0 && strings.ToLower(autoscalerOptions.ImagePullPolicy) == "always" { + policy := corev1.PullAlways + options.ImagePullPolicy = &policy } + if autoscalerOptions.Envs != nil { if len(autoscalerOptions.Envs.Values) > 0 { options.Env = make([]corev1.EnvVar, len(autoscalerOptions.Envs.Values)) diff --git a/apiserver/pkg/util/cluster_test.go b/apiserver/pkg/util/cluster_test.go index 2be505bafc0..d2a9661f693 100644 --- a/apiserver/pkg/util/cluster_test.go +++ b/apiserver/pkg/util/cluster_test.go @@ -125,6 +125,7 @@ var testAutoscalerOptions = api.AutoscalerOptions{ var headGroup = api.HeadGroupSpec{ ComputeTemplate: "foo", Image: "bar", + ImagePullPolicy: "Always", ServiceType: "ClusterIP", RayStartParams: map[string]string{ "dashboard-host": "0.0.0.0", @@ -166,12 +167,18 @@ var headGroup = api.HeadGroupSpec{ Labels: map[string]string{ "foo": "bar", }, + SecurityContext: &api.SecurityContext{ + Capabilities: &api.Capabilities{ + Add: []string{"SYS_PTRACE"}, + }, + }, } var workerGroup = api.WorkerGroupSpec{ GroupName: "wg", ComputeTemplate: "foo", Image: "bar", + ImagePullPolicy: "Always", Replicas: 5, MinReplicas: 5, MaxReplicas: 5, @@ -191,6 +198,11 @@ var workerGroup = api.WorkerGroupSpec{ Labels: map[string]string{ "foo": "bar", }, + SecurityContext: &api.SecurityContext{ + Capabilities: &api.Capabilities{ + Add: []string{"SYS_PTRACE"}, + }, + }, } var rayCluster = api.Cluster{ @@ -241,6 +253,22 @@ var template = api.ComputeTemplate{ }, } +var templateWorker = api.ComputeTemplate{ + Name: "", + Namespace: "", + Cpu: 2, + Memory: 8, + Gpu: 4, + ExtendedResources: map[string]uint32{"vpc.amazonaws.com/efa": 32}, + Tolerations: []*api.PodToleration{ + { + Key: "blah1", + Operator: "Exists", + Effect: "NoExecute", + }, + }, +} + var expectedToleration = corev1.Toleration{ Key: "blah1", Operator: "Exists", @@ -305,6 +333,14 @@ var expectedHeadNodeEnv = []corev1.EnvVar{ }, } +var expectedSecurityContext = corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + "SYS_PTRACE", + }, + }, +} + func TestBuildVolumes(t *testing.T) { targetVolume := corev1.Volume{ Name: testVolume.Name, @@ -349,7 +385,7 @@ func TestBuildVolumes(t *testing.T) { AccessModes: []corev1.PersistentVolumeAccessMode{ corev1.ReadWriteOnce, }, - Resources: corev1.ResourceRequirements{ + Resources: corev1.VolumeResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceStorage: resource.MustParse(testEphemeralVolume.Storage), }, @@ -517,6 +553,9 @@ func TestBuildHeadPodTemplate(t *testing.T) { if podSpec.Spec.ImagePullSecrets[0].Name != "foo" { t.Errorf("failed to propagate image pull secret") } + if (string)(podSpec.Spec.Containers[0].ImagePullPolicy) != "Always" { + t.Errorf("failed to propagate image pull policy") + } if len(podSpec.Spec.Containers[0].Env) != 6 { t.Errorf("failed to propagate environment") } @@ -549,6 +588,10 @@ func TestBuildHeadPodTemplate(t *testing.T) { t.Errorf("failed to convert labels, got %v, expected %v", podSpec.Labels, expectedLabels) } + if !reflect.DeepEqual(podSpec.Spec.Containers[0].SecurityContext, &expectedSecurityContext) { + t.Errorf("failed to convert security context, got %v, expected %v", podSpec.Spec.SecurityContext, &expectedSecurityContext) + } + podSpec, err = buildHeadPodTemplate("2.4", &api.EnvironmentVariables{}, &headGroup, &template, true) assert.Nil(t, err) if len(podSpec.Spec.Containers[0].Ports) != 6 { @@ -561,6 +604,7 @@ func TestConvertAutoscalerOptions(t *testing.T) { assert.Nil(t, err) assert.Equal(t, *options.IdleTimeoutSeconds, int32(25)) assert.Equal(t, (string)(*options.UpscalingMode), "Default") + assert.Equal(t, (string)(*options.ImagePullPolicy), "Always") assert.Equal(t, len(options.Env), 1) assert.Equal(t, len(options.EnvFrom), 2) assert.Equal(t, len(options.VolumeMounts), 2) @@ -585,31 +629,40 @@ func TestBuildRayCluster(t *testing.T) { } func TestBuilWorkerPodTemplate(t *testing.T) { - podSpec, err := buildWorkerPodTemplate("2.4", &api.EnvironmentVariables{}, &workerGroup, &template) + podSpec, err := buildWorkerPodTemplate("2.4", &api.EnvironmentVariables{}, &workerGroup, &templateWorker) assert.Nil(t, err) - if podSpec.Spec.ServiceAccountName != "account" { - t.Errorf("failed to propagate service account") - } - if podSpec.Spec.ImagePullSecrets[0].Name != "foo" { - t.Errorf("failed to propagate image pull secret") - } - if !containsEnv(podSpec.Spec.Containers[0].Env, "foo", "bar") { - t.Errorf("failed to propagate environment") - } - if len(podSpec.Spec.Tolerations) != 1 { - t.Errorf("failed to propagate tolerations, expected 1, got %d", len(podSpec.Spec.Tolerations)) - } - if !reflect.DeepEqual(podSpec.Spec.Tolerations[0], expectedToleration) { - t.Errorf("failed to propagate annotations, got %v, expected %v", tolerationToString(&podSpec.Spec.Tolerations[0]), - tolerationToString(&expectedToleration)) - } - if val, exists := podSpec.Annotations["foo"]; !exists || val != "bar" { - t.Errorf("failed to convert annotations") - } - if !reflect.DeepEqual(podSpec.Labels, expectedLabels) { - t.Errorf("failed to convert labels, got %v, expected %v", podSpec.Labels, expectedLabels) - } + assert.Equal(t, "account", podSpec.Spec.ServiceAccountName, "failed to propagate service account") + assert.Equal(t, "foo", podSpec.Spec.ImagePullSecrets[0].Name, "failed to propagate image pull secret") + assert.Equal(t, corev1.PullAlways, podSpec.Spec.Containers[0].ImagePullPolicy, "failed to propagate image pull policy") + assert.True(t, containsEnv(podSpec.Spec.Containers[0].Env, "foo", "bar"), "failed to propagate environment") + assert.Len(t, podSpec.Spec.Tolerations, 1, "failed to propagate tolerations") + assert.Equal(t, expectedToleration, podSpec.Spec.Tolerations[0], "failed to propagate tolerations") + assert.Equal(t, "bar", podSpec.Annotations["foo"], "failed to convert annotations") + assert.Equal(t, expectedLabels, podSpec.Labels, "failed to convert labels") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "CPU_REQUEST", &corev1.EnvVarSource{ResourceFieldRef: &corev1.ResourceFieldSelector{ContainerName: "ray-worker", Resource: "requests.cpu"}}), "failed to propagate environment variable: CPU_REQUEST") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "CPU_LIMITS", &corev1.EnvVarSource{ResourceFieldRef: &corev1.ResourceFieldSelector{ContainerName: "ray-worker", Resource: "limits.cpu"}}), "failed to propagate environment variable: CPU_LIMITS") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "MEMORY_REQUESTS", &corev1.EnvVarSource{ResourceFieldRef: &corev1.ResourceFieldSelector{ContainerName: "ray-worker", Resource: "requests.memory"}}), "failed to propagate environment variable: MEMORY_REQUESTS") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "MEMORY_LIMITS", &corev1.EnvVarSource{ResourceFieldRef: &corev1.ResourceFieldSelector{ContainerName: "ray-worker", Resource: "limits.memory"}}), "failed to propagate environment variable: MEMORY_LIMITS") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "MY_POD_NAME", &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.name"}}), "failed to propagate environment variable: MY_POD_NAME") + assert.True(t, containsEnvValueFrom(podSpec.Spec.Containers[0].Env, "MY_POD_IP", &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.podIP"}}), "failed to propagate environment variable: MY_POD_IP") + assert.Equal(t, &expectedSecurityContext, podSpec.Spec.Containers[0].SecurityContext, "failed to convert security context") + + // Check Resources + container := podSpec.Spec.Containers[0] + resources := container.Resources + + assert.Equal(t, resource.MustParse("2"), resources.Limits[corev1.ResourceCPU], "CPU limit doesn't match") + assert.Equal(t, resource.MustParse("2"), resources.Requests[corev1.ResourceCPU], "CPU request doesn't match") + + assert.Equal(t, resource.MustParse("8Gi"), resources.Limits[corev1.ResourceMemory], "Memory limit doesn't match") + assert.Equal(t, resource.MustParse("8Gi"), resources.Requests[corev1.ResourceMemory], "Memory request doesn't match") + + assert.Equal(t, resource.MustParse("4"), resources.Limits["nvidia.com/gpu"], "GPU limit doesn't match") + assert.Equal(t, resource.MustParse("4"), resources.Requests["nvidia.com/gpu"], "GPU request doesn't match") + + assert.Equal(t, resource.MustParse("32"), resources.Limits["vpc.amazonaws.com/efa"], "EFA limit doesn't match") + assert.Equal(t, resource.MustParse("32"), resources.Requests["vpc.amazonaws.com/efa"], "EFA request doesn't match") } func containsEnv(envs []corev1.EnvVar, key string, val string) bool { @@ -621,6 +674,15 @@ func containsEnv(envs []corev1.EnvVar, key string, val string) bool { return false } +func containsEnvValueFrom(envs []corev1.EnvVar, key string, valFrom *corev1.EnvVarSource) bool { + for _, env := range envs { + if env.Name == key && reflect.DeepEqual(env.ValueFrom, valFrom) { + return true + } + } + return false +} + func tolerationToString(toleration *corev1.Toleration) string { return "Key: " + toleration.Key + " Operator: " + string(toleration.Operator) + " Effect: " + string(toleration.Effect) } diff --git a/apiserver/pkg/util/job.go b/apiserver/pkg/util/job.go index 5adac3ae7a8..a0198e7ca46 100644 --- a/apiserver/pkg/util/job.go +++ b/apiserver/pkg/util/job.go @@ -4,10 +4,11 @@ import ( "fmt" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) type RayJob struct { diff --git a/apiserver/pkg/util/service.go b/apiserver/pkg/util/service.go index a0a13239bf6..a1d69889b63 100644 --- a/apiserver/pkg/util/service.go +++ b/apiserver/pkg/util/service.go @@ -4,8 +4,9 @@ import ( "errors" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) type RayService struct { diff --git a/apiserver/test/cluster/cluster/cluster b/apiserver/test/cluster/cluster/cluster index d9436aa05f5..efa533abde6 100644 --- a/apiserver/test/cluster/cluster/cluster +++ b/apiserver/test/cluster/cluster/cluster @@ -46,4 +46,4 @@ curl -X POST 'localhost:8888/apis/v1/namespaces/default/clusters' \ } ] } -}' \ No newline at end of file +}' diff --git a/apiserver/test/cluster/cluster/detachedactor.yaml b/apiserver/test/cluster/cluster/detachedactor.yaml index 95cbe20ede7..45eaea2bfc6 100644 --- a/apiserver/test/cluster/cluster/detachedactor.yaml +++ b/apiserver/test/cluster/cluster/detachedactor.yaml @@ -20,4 +20,4 @@ data: ray.init(namespace="default_namespace") detached_actor = ray.get_actor(sys.argv[1]) - ray.kill(detached_actor) \ No newline at end of file + ray.kill(detached_actor) diff --git a/apiserver/test/cluster/template/simple b/apiserver/test/cluster/template/simple index cbe73ad2e1b..9e766f73bc1 100644 --- a/apiserver/test/cluster/template/simple +++ b/apiserver/test/cluster/template/simple @@ -23,4 +23,3 @@ curl 'localhost:8888/apis/v1/namespaces/default/compute_templates/default-templa # Delete by name curl -X DELETE 'localhost:8888/apis/v1/namespaces/default/compute_templates/default-template' \ --header 'Content-Type: application/json' - diff --git a/apiserver/test/e2e/cluster_server_autoscaler_e2e_test.go b/apiserver/test/e2e/cluster_server_autoscaler_e2e_test.go index 0cc54e7fcd1..836274ca49d 100644 --- a/apiserver/test/e2e/cluster_server_autoscaler_e2e_test.go +++ b/apiserver/test/e2e/cluster_server_autoscaler_e2e_test.go @@ -6,8 +6,9 @@ import ( api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/stretchr/testify/require" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) // TestCreateClusterAutoscalerEndpoint sequentially iterates over the create cluster endpoint @@ -48,7 +49,7 @@ func TestCreateClusterAutoscaler(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ EnableInTreeAutoscaling: true, diff --git a/apiserver/test/e2e/cluster_server_e2e_test.go b/apiserver/test/e2e/cluster_server_e2e_test.go index 535a9376187..6949865814a 100644 --- a/apiserver/test/e2e/cluster_server_e2e_test.go +++ b/apiserver/test/e2e/cluster_server_e2e_test.go @@ -9,10 +9,11 @@ import ( kuberayHTTP "github.com/ray-project/kuberay/apiserver/pkg/http" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/wait" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) // TestCreateClusterEndpoint sequentially iterates over the create cluster endpoint @@ -41,7 +42,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "3cpo", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -79,7 +80,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -139,7 +140,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -247,7 +248,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "bullwinkle", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: nil, }, @@ -264,7 +265,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "bullwinkle", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{}, }, @@ -281,7 +282,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -309,7 +310,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -334,7 +335,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -364,7 +365,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ @@ -398,7 +399,7 @@ func TestCreateClusterEndpoint(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ diff --git a/apiserver/test/e2e/config_server_e2e_test.go b/apiserver/test/e2e/config_server_e2e_test.go index 86dcd0e6c75..68fa97218ae 100644 --- a/apiserver/test/e2e/config_server_e2e_test.go +++ b/apiserver/test/e2e/config_server_e2e_test.go @@ -207,14 +207,14 @@ func TestGetAllComputeTemplates(t *testing.T) { require.Nil(t, actualRpcStatus, "No RPC status expected") require.NotNil(t, response, "A response is expected") require.NotEmpty(t, response.ComputeTemplates, "A list of compute templates is required") - found_name := false + foundName := false for _, template := range response.ComputeTemplates { if tCtx.GetComputeTemplateName() == template.Name && tCtx.GetNamespaceName() == template.Namespace { - found_name = true + foundName = true break } } - require.Equal(t, found_name, true) + require.Equal(t, foundName, true) } // TestGetTemplatesInNamespace get all compute templates in namespace endpoint @@ -236,14 +236,14 @@ func TestGetTemplatesInNamespace(t *testing.T) { require.Nil(t, actualRpcStatus, "No RPC status expected") require.NotNil(t, response, "A response is expected") require.NotEmpty(t, response.ComputeTemplates, "A list of compute templates is required") - found_name := false + foundName := false for _, template := range response.ComputeTemplates { if tCtx.GetComputeTemplateName() == template.Name && tCtx.GetNamespaceName() == template.Namespace { - found_name = true + foundName = true break } } - require.Equal(t, found_name, true) + require.Equal(t, foundName, true) } // TestDeleteTemplate sequentially iterates over the delete compute template endpoint diff --git a/apiserver/test/e2e/defaults.go b/apiserver/test/e2e/defaults.go new file mode 100644 index 00000000000..c246d172943 --- /dev/null +++ b/apiserver/test/e2e/defaults.go @@ -0,0 +1,6 @@ +package e2e + +const ( + RayVersion = "2.9.0" + RayImage = "rayproject/ray:2.9.0" +) diff --git a/apiserver/test/e2e/job_server_e2e_test.go b/apiserver/test/e2e/job_server_e2e_test.go index 12e137edae8..fdff0b35039 100644 --- a/apiserver/test/e2e/job_server_e2e_test.go +++ b/apiserver/test/e2e/job_server_e2e_test.go @@ -12,6 +12,7 @@ import ( kuberayHTTP "github.com/ray-project/kuberay/apiserver/pkg/http" api "github.com/ray-project/kuberay/proto/go_client" + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) @@ -85,7 +86,7 @@ func TestCreateJobWithDisposableClusters(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "natacha", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/counter_sample.py", RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", ShutdownAfterJobFinishes: true, @@ -104,7 +105,7 @@ func TestCreateJobWithDisposableClusters(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "natacha", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/fail_fast.py", ShutdownAfterJobFinishes: true, ClusterSpec: clusterSpec, @@ -195,7 +196,7 @@ func TestCreateJobWithDisposableClusters(t *testing.T) { Namespace: tCtx.GetNamespaceName(), Name: tCtx.GetNextName(), User: "bullwinkle", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/counter_sample.py", RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", ShutdownAfterJobFinishes: true, @@ -303,14 +304,14 @@ func TestGetAllJobs(t *testing.T) { require.Nil(t, actualRpcStatus, "No RPC status expected") require.NotNil(t, response, "A response is expected") require.NotEmpty(t, response.Jobs, "A list of jobs is required") - found_name := false + foundName := false for _, job := range response.Jobs { if testJobRequest.Job.Name == job.Name && tCtx.GetNamespaceName() == job.Namespace { - found_name = true + foundName = true break } } - require.Equal(t, found_name, true) + require.Equal(t, foundName, true) } func TestGetJobsInNamespace(t *testing.T) { @@ -332,15 +333,15 @@ func TestGetJobsInNamespace(t *testing.T) { require.NoError(t, err, "No error expected") require.Nil(t, actualRpcStatus, "No RPC status expected") require.NotNil(t, response, "A response is expected") - require.NotEmpty(t, response.Jobs, "A list of compute templates is required") - found_name := false + require.NotEmpty(t, response.Jobs, "A list of jobs is required") + foundName := false for _, job := range response.Jobs { if testJobRequest.Job.Name == job.Name && tCtx.GetNamespaceName() == job.Namespace { - found_name = true + foundName = true break } } - require.Equal(t, found_name, true) + require.Equal(t, foundName, true) } func TestGetJob(t *testing.T) { @@ -443,7 +444,7 @@ func TestCreateJobWithClusterSelector(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "r2d2", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/counter_sample.py", Metadata: map[string]string{}, RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", @@ -465,7 +466,7 @@ func TestCreateJobWithClusterSelector(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "r2d2", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/fail_fast.py", RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", ShutdownAfterJobFinishes: true, @@ -531,7 +532,7 @@ func createTestJob(t *testing.T, tCtx *End2EndTestingContext) *api.CreateRayJobR Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "natacha", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Entrypoint: "python /home/ray/samples/counter_sample.py", RuntimeEnv: "pip:\n - requests==2.26.0\n - pendulum==2.1.2\nenv_vars:\n counter_name: test_counter\n", ShutdownAfterJobFinishes: true, diff --git a/apiserver/test/e2e/job_submission_e2e_test.go b/apiserver/test/e2e/job_submission_e2e_test.go index 72c96c362fb..99d7a84445b 100644 --- a/apiserver/test/e2e/job_submission_e2e_test.go +++ b/apiserver/test/e2e/job_submission_e2e_test.go @@ -41,7 +41,7 @@ func TestCreateJobSubmission(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "boris", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), Environment: api.Cluster_DEV, ClusterSpec: &api.ClusterSpec{ HeadGroupSpec: &api.HeadGroupSpec{ diff --git a/apiserver/test/e2e/resources/terminate_detached_actor.py b/apiserver/test/e2e/resources/terminate_detached_actor.py index 3dda416110b..aa252999075 100644 --- a/apiserver/test/e2e/resources/terminate_detached_actor.py +++ b/apiserver/test/e2e/resources/terminate_detached_actor.py @@ -2,4 +2,4 @@ ray.init(namespace="default_namespace") detached_actor = ray.get_actor("detached_actor") -ray.kill(detached_actor) \ No newline at end of file +ray.kill(detached_actor) diff --git a/apiserver/test/e2e/service_server_e2e_test.go b/apiserver/test/e2e/service_server_e2e_test.go index 9f0fd2b7b7b..a5fea34876e 100644 --- a/apiserver/test/e2e/service_server_e2e_test.go +++ b/apiserver/test/e2e/service_server_e2e_test.go @@ -8,10 +8,11 @@ import ( kuberayHTTP "github.com/ray-project/kuberay/apiserver/pkg/http" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/wait" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) // TestServiceServerV2 sequentially iterates over the endpoints of the service endpoints using @@ -59,7 +60,7 @@ func TestCreateServiceV2(t *testing.T) { Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "user1", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), ServeConfig_V2: "applications:\n - name: fruit_app\n import_path: fruit.deployment_graph\n route_prefix: /fruit\n runtime_env:\n working_dir: \"https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip\"\n deployments:\n - name: MangoStand\n num_replicas: 2\n max_replicas_per_node: 1\n user_config:\n price: 3\n ray_actor_options:\n num_cpus: 0.1\n - name: OrangeStand\n num_replicas: 1\n user_config:\n price: 2\n ray_actor_options:\n num_cpus: 0.1\n - name: PearStand\n num_replicas: 1\n user_config:\n price: 1\n ray_actor_options:\n num_cpus: 0.1\n - name: FruitMarket\n num_replicas: 1\n ray_actor_options:\n num_cpus: 0.1\n - name: math_app\n import_path: conditional_dag.serve_dag\n route_prefix: /calc\n runtime_env:\n working_dir: \"https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip\"\n deployments:\n - name: Adder\n num_replicas: 1\n user_config:\n increment: 3\n ray_actor_options:\n num_cpus: 0.1\n - name: Multiplier\n num_replicas: 1\n user_config:\n factor: 5\n ray_actor_options:\n num_cpus: 0.1\n - name: Router\n num_replicas: 1\n", ServiceUnhealthySecondThreshold: 10, DeploymentUnhealthySecondThreshold: 20, @@ -355,7 +356,7 @@ func createTestServiceV2(t *testing.T, tCtx *End2EndTestingContext) *api.CreateR Name: tCtx.GetNextName(), Namespace: tCtx.GetNamespaceName(), User: "user1", - Version: "2.9.0", + Version: tCtx.GetRayVersion(), ServeConfig_V2: "applications:\n - name: fruit_app\n import_path: fruit.deployment_graph\n route_prefix: /fruit\n runtime_env:\n working_dir: \"https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip\"\n deployments:\n - name: MangoStand\n num_replicas: 2\n max_replicas_per_node: 1\n user_config:\n price: 3\n ray_actor_options:\n num_cpus: 0.1\n - name: OrangeStand\n num_replicas: 1\n user_config:\n price: 2\n ray_actor_options:\n num_cpus: 0.1\n - name: PearStand\n num_replicas: 1\n user_config:\n price: 1\n ray_actor_options:\n num_cpus: 0.1\n - name: FruitMarket\n num_replicas: 1\n ray_actor_options:\n num_cpus: 0.1\n - name: math_app\n import_path: conditional_dag.serve_dag\n route_prefix: /calc\n runtime_env:\n working_dir: \"https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip\"\n deployments:\n - name: Adder\n num_replicas: 1\n user_config:\n increment: 3\n ray_actor_options:\n num_cpus: 0.1\n - name: Multiplier\n num_replicas: 1\n user_config:\n factor: 5\n ray_actor_options:\n num_cpus: 0.1\n - name: Router\n num_replicas: 1\n", ServiceUnhealthySecondThreshold: 10, DeploymentUnhealthySecondThreshold: 20, diff --git a/apiserver/test/e2e/types.go b/apiserver/test/e2e/types.go index 53ba67fe154..7072eeb87d7 100644 --- a/apiserver/test/e2e/types.go +++ b/apiserver/test/e2e/types.go @@ -12,8 +12,6 @@ import ( petnames "github.com/dustinkirkland/golang-petname" kuberayHTTP "github.com/ray-project/kuberay/apiserver/pkg/http" api "github.com/ray-project/kuberay/proto/go_client" - rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" @@ -24,6 +22,9 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client/config" + + rayv1api "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1 "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/typed/ray/v1" ) // GenericEnd2EndTest struct allows for reuse in setting up and running tests @@ -43,6 +44,7 @@ type End2EndTestingContext struct { k8client *kubernetes.Clientset apiServerBaseURL string rayImage string + rayVersion string namespaceName string computeTemplateName string clusterName string @@ -60,6 +62,7 @@ func NewEnd2EndTestingContext(t *testing.T) (*End2EndTestingContext, error) { // ordering is important as there dependencies between field values return newEnd2EndTestingContext(t, withRayImage(), + withRayVersion(), withBaseURL(), withHttpClient(), withContext(), @@ -116,7 +119,7 @@ func withRayImage() contextOption { return func(_ *testing.T, testingContext *End2EndTestingContext) error { rayImage := os.Getenv("E2E_API_SERVER_RAY_IMAGE") if strings.TrimSpace(rayImage) == "" { - rayImage = "rayproject/ray:2.9.0-py310" + rayImage = RayImage + "-py310" } // detect if we are running on arm64 machine, most likely apple silicon // the os name is not checked as it also possible that it might be linux @@ -129,6 +132,17 @@ func withRayImage() contextOption { } } +func withRayVersion() contextOption { + return func(_ *testing.T, testingContext *End2EndTestingContext) error { + rayVersion := os.Getenv("E2E_API_SERVER_RAY_VERSION") + if strings.TrimSpace(rayVersion) == "" { + rayVersion = RayVersion + } + testingContext.rayVersion = rayVersion + return nil + } +} + func withK8sClient() contextOption { return func(t *testing.T, testingContext *End2EndTestingContext) error { cfg, err := config.GetConfig() @@ -209,6 +223,10 @@ func (e2etc *End2EndTestingContext) GetRayImage() string { return e2etc.rayImage } +func (e2etc *End2EndTestingContext) GetRayVersion() string { + return e2etc.rayVersion +} + func (e2etc *End2EndTestingContext) GetRayApiServerClient() *kuberayHTTP.KuberayAPIServerClient { return e2etc.kuberayAPIServerClient } diff --git a/apiserver/test/job/job b/apiserver/test/job/job index e633c9638be..67d4212175d 100644 --- a/apiserver/test/job/job +++ b/apiserver/test/job/job @@ -48,4 +48,4 @@ curl -X POST 'localhost:8888/apis/v1/namespaces/default/jobs' \ } ] } -}' \ No newline at end of file +}' diff --git a/apiserver/test/service/service b/apiserver/test/service/service index 5cf3d68baa1..fd1233fb171 100644 --- a/apiserver/test/service/service +++ b/apiserver/test/service/service @@ -7,4 +7,4 @@ curl 'localhost:8888/apis/v1/services' \ --header 'Content-Type: application/json' # Delete service -curl -X DELETE localhost:8888/apis/v1/namespaces/default/services/test-v2 \ No newline at end of file +curl -X DELETE localhost:8888/apis/v1/namespaces/default/services/test-v2 diff --git a/apiserver/test/service/serviceV1 b/apiserver/test/service/serviceV1 index c542c003a55..641c3446ede 100644 --- a/apiserver/test/service/serviceV1 +++ b/apiserver/test/service/serviceV1 @@ -67,4 +67,4 @@ curl -X POST 'localhost:8888/apis/v1/namespaces/default/services' \ } ] } -}' \ No newline at end of file +}' diff --git a/apiserver/test/service/serviceV2 b/apiserver/test/service/serviceV2 index 38947267bde..343011cafc9 100644 --- a/apiserver/test/service/serviceV2 +++ b/apiserver/test/service/serviceV2 @@ -32,4 +32,4 @@ curl -X POST 'localhost:8888/apis/v1/namespaces/default/services' \ } ] } -}' \ No newline at end of file +}' diff --git a/benchmark/memory_benchmark/memory_benchmark.md b/benchmark/memory_benchmark/memory_benchmark.md index 8ff334f1c39..3f502747c70 100644 --- a/benchmark/memory_benchmark/memory_benchmark.md +++ b/benchmark/memory_benchmark/memory_benchmark.md @@ -81,4 +81,4 @@ In addition, the number of custom resources in the Kubernetes cluster does not h * Note that the x-axis "Number of Pods" is the number of Pods that are created rather than running. If the Kubernetes cluster does not have enough computing resources, the GKE Autopilot will add a new Kubernetes node into the cluster. This process may take a few minutes, so some Pods may be pending in the process. -This may be the reason why the memory usage is somewhat throttled. \ No newline at end of file +This may be the reason why the memory usage is somewhat throttled. diff --git a/benchmark/memory_benchmark/scripts/ray-cluster.benchmark.yaml.template b/benchmark/memory_benchmark/scripts/ray-cluster.benchmark.yaml.template index 940ca6babd7..f6f8ec66b8a 100644 --- a/benchmark/memory_benchmark/scripts/ray-cluster.benchmark.yaml.template +++ b/benchmark/memory_benchmark/scripts/ray-cluster.benchmark.yaml.template @@ -54,4 +54,4 @@ spec: memory: "1G" volumes: - name: ray-logs - emptyDir: {} \ No newline at end of file + emptyDir: {} diff --git a/benchmark/perf-tests/100-raycluster/config.yaml b/benchmark/perf-tests/100-raycluster/config.yaml new file mode 100644 index 00000000000..b5c30659bd6 --- /dev/null +++ b/benchmark/perf-tests/100-raycluster/config.yaml @@ -0,0 +1,58 @@ +name: kuberay +namespace: + number: 10 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s +- name: Creating Ray clusters + phases: + - namespaceRange: + min: 1 + max: 10 + replicasPerNamespace: 10 + tuningSet: Uniform100qps + objectBundle: + - basename: raycluster + objectTemplatePath: raycluster.yaml + templateFillMap: + Replicas: 3 +- name: Wait for RayClusters ready + measurements: + - Identifier: WaitForRayCluster + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/wait-for-rayclusters.sh" + - "100" +- name: Wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather diff --git a/benchmark/perf-tests/100-raycluster/raycluster.yaml b/benchmark/perf-tests/100-raycluster/raycluster.yaml new file mode 100644 index 00000000000..72d34b8ab3b --- /dev/null +++ b/benchmark/perf-tests/100-raycluster/raycluster.yaml @@ -0,0 +1,57 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: {{.Name}} + labels: + perf-test: ray-cluster +spec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.3 + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: "1" + requests: + cpu: "100m" + volumes: + - name: ray-logs + emptyDir: {} + workerGroupSpecs: + - replicas: {{.Replicas}} + minReplicas: 1 + maxReplicas: 10 + # logical group name, for this called small-group, also can be functional + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.3 + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: "1" + requests: + cpu: "100m" + volumes: + - name: ray-logs + emptyDir: {} diff --git a/benchmark/perf-tests/100-raycluster/results/junit.xml b/benchmark/perf-tests/100-raycluster/results/junit.xml new file mode 100644 index 00000000000..34c6e3dfb05 --- /dev/null +++ b/benchmark/perf-tests/100-raycluster/results/junit.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/benchmark/perf-tests/100-rayjob/config.yaml b/benchmark/perf-tests/100-rayjob/config.yaml new file mode 100644 index 00000000000..60d44c54d18 --- /dev/null +++ b/benchmark/perf-tests/100-rayjob/config.yaml @@ -0,0 +1,83 @@ +name: kuberay +namespace: + number: 10 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 10m +- name: Creating RayJobs for PyTorch MNIST fine-tuning + phases: + - namespaceRange: + min: 1 + max: 10 + replicasPerNamespace: 10 + tuningSet: Uniform100qps + objectBundle: + - basename: pytorch-mnist + objectTemplatePath: pytorch-mnist-rayjob.yaml + templateFillMap: + Image: "rayproject/ray:2.9.3" # replace with image built from images/ray-pytorch +- name: Creating RayJobs for Ray Data Image Resizing + phases: + - namespaceRange: + min: 1 + max: 10 + replicasPerNamespace: 10 + tuningSet: Uniform100qps + objectBundle: + - basename: ray-data-image-resize + objectTemplatePath: ray-data-image-resize.yaml + templateFillMap: + Image: "rayproject/ray:2.9.3" # replace with image built from images/ray-pytorch +- name: Wait for RayJobs complete + measurements: + - Identifier: WaitForRayJob + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/wait-for-rayjobs.sh" + - "100" +- name: Wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather + operationTimeout: 10m +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather +- name: Measure job finished + measurements: + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: gather diff --git a/benchmark/perf-tests/100-rayjob/pytorch-mnist-rayjob.yaml b/benchmark/perf-tests/100-rayjob/pytorch-mnist-rayjob.yaml new file mode 100644 index 00000000000..85e8001cbce --- /dev/null +++ b/benchmark/perf-tests/100-rayjob/pytorch-mnist-rayjob.yaml @@ -0,0 +1,53 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: rayjob-pytorch-mnist +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_train_pytorch_mnist.py + runtimeEnvYAML: | + env_vars: + NUM_WORKERS: "2" + CPUS_PER_WORKER: "1" + OMP_NUM_THREADS: "1" # Set OMP_NUM_THREADS to avoid KeyErorr race condition. + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + memory: "4Gi" + requests: + cpu: "1" + memory: "4Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + limits: + memory: "4Gi" + requests: + cpu: "1" + memory: "4Gi" diff --git a/benchmark/perf-tests/100-rayjob/ray-data-image-resize.yaml b/benchmark/perf-tests/100-rayjob/ray-data-image-resize.yaml new file mode 100644 index 00000000000..266ab023254 --- /dev/null +++ b/benchmark/perf-tests/100-rayjob/ray-data-image-resize.yaml @@ -0,0 +1,48 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: ray-data-image-resize +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_data_image_resize.py + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + memory: "10Gi" + requests: + cpu: "2" + memory: "10Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + limits: + memory: "4Gi" + requests: + cpu: "2" + memory: "4Gi" diff --git a/benchmark/perf-tests/100-rayjob/results/junit.xml b/benchmark/perf-tests/100-rayjob/results/junit.xml new file mode 100644 index 00000000000..947a39867e1 --- /dev/null +++ b/benchmark/perf-tests/100-rayjob/results/junit.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/benchmark/perf-tests/1000-raycluster/config.yaml b/benchmark/perf-tests/1000-raycluster/config.yaml new file mode 100644 index 00000000000..1649ae2c1d8 --- /dev/null +++ b/benchmark/perf-tests/1000-raycluster/config.yaml @@ -0,0 +1,68 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s +- name: Preload Images + measurements: + - Identifier: PreloadImages + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/preload-image.sh" +- name: Creating Ray clusters + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 10 + tuningSet: Uniform100qps + objectBundle: + - basename: raycluster + objectTemplatePath: raycluster.yaml + templateFillMap: + Replicas: 3 + Image: "rayproject/ray:2.9.3" +- name: Wait for RayClusters ready + measurements: + - Identifier: WaitForRayCluster + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/wait-for-rayclusters.sh" + - "1000" +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather diff --git a/benchmark/perf-tests/1000-raycluster/raycluster.yaml b/benchmark/perf-tests/1000-raycluster/raycluster.yaml new file mode 100644 index 00000000000..742891fdd25 --- /dev/null +++ b/benchmark/perf-tests/1000-raycluster/raycluster.yaml @@ -0,0 +1,50 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: {{.Name}} + labels: + perf-test: ray-cluster +spec: + rayVersion: '2.9.3' + headGroupSpec: + serviceType: ClusterIP + rayStartParams: + dashboard-host: '0.0.0.0' + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "10m" + volumes: + - name: ray-logs + emptyDir: {} + workerGroupSpecs: + - replicas: {{.Replicas}} + minReplicas: 1 + maxReplicas: 10 + # logical group name, for this called small-group, also can be functional + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + limits: + cpu: "1" + requests: + cpu: "10m" diff --git a/benchmark/perf-tests/1000-raycluster/results/junit.xml b/benchmark/perf-tests/1000-raycluster/results/junit.xml new file mode 100644 index 00000000000..2a945ee9cc6 --- /dev/null +++ b/benchmark/perf-tests/1000-raycluster/results/junit.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/benchmark/perf-tests/1000-rayjob/config.yaml b/benchmark/perf-tests/1000-rayjob/config.yaml new file mode 100644 index 00000000000..095b164a6f5 --- /dev/null +++ b/benchmark/perf-tests/1000-rayjob/config.yaml @@ -0,0 +1,83 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 10m +- name: Creating RayJobs for PyTorch MNIST fine-tuning + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 5 + tuningSet: Uniform100qps + objectBundle: + - basename: pytorch-mnist + objectTemplatePath: pytorch-mnist-rayjob.yaml + templateFillMap: + Image: "rayproject/ray:2.9.3" +- name: Creating RayJobs for Ray Data Image Resizing + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 5 + tuningSet: Uniform100qps + objectBundle: + - basename: ray-data-image-resize + objectTemplatePath: ray-data-image-resize.yaml + templateFillMap: + Image: "rayproject/ray:2.9.3" +- name: Wait for RayJobs complete + measurements: + - Identifier: WaitForRayJob + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/wait-for-rayjobs.sh" + - "500" # 1000 since we deploy two RayJobs with 500 instances each +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather + operationTimeout: 10m +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather +- name: Measure job finished + measurements: + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: gather diff --git a/benchmark/perf-tests/1000-rayjob/pytorch-mnist-rayjob.yaml b/benchmark/perf-tests/1000-rayjob/pytorch-mnist-rayjob.yaml new file mode 100644 index 00000000000..413e6816a7e --- /dev/null +++ b/benchmark/perf-tests/1000-rayjob/pytorch-mnist-rayjob.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: rayjob-pytorch-mnist +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_train_pytorch_mnist.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"NUM_WORKERS":"2","CPUS_PER_WORKER":"1","OMP_NUM_THREADS":"1"}}' -- python ray_train_pytorch_mnist.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "4Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "4Gi" diff --git a/benchmark/perf-tests/1000-rayjob/ray-data-image-resize.yaml b/benchmark/perf-tests/1000-rayjob/ray-data-image-resize.yaml new file mode 100644 index 00000000000..8c9e2bdc3f3 --- /dev/null +++ b/benchmark/perf-tests/1000-rayjob/ray-data-image-resize.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: ray-data-image-resize +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_data_image_resize.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"BUCKET_NAME":"ray-images","BUCKET_PREFIX":"images"}}' -- python ray_data_image_resize.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "2Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "2Gi" diff --git a/benchmark/perf-tests/1000-rayjob/results/junit.xml b/benchmark/perf-tests/1000-rayjob/results/junit.xml new file mode 100644 index 00000000000..2066c09568d --- /dev/null +++ b/benchmark/perf-tests/1000-rayjob/results/junit.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/benchmark/perf-tests/10000-raycluster/config.yaml b/benchmark/perf-tests/10000-raycluster/config.yaml new file mode 100644 index 00000000000..f435dfd7bac --- /dev/null +++ b/benchmark/perf-tests/10000-raycluster/config.yaml @@ -0,0 +1,68 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s +- name: Preload Images + measurements: + - Identifier: PreloadImages + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/preload-image.sh" +- name: Creating Ray clusters + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 100 + tuningSet: Uniform100qps + objectBundle: + - basename: raycluster + objectTemplatePath: raycluster.yaml + templateFillMap: + Replicas: 3 + Image: "rayproject/ray:2.9.3" +- name: Wait for RayClusters ready + measurements: + - Identifier: WaitForRayCluster + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "10000-raycluster/wait-for-rayclusters.sh" + - "10000" +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather diff --git a/benchmark/perf-tests/10000-raycluster/raycluster.yaml b/benchmark/perf-tests/10000-raycluster/raycluster.yaml new file mode 100644 index 00000000000..4a4c37bebd2 --- /dev/null +++ b/benchmark/perf-tests/10000-raycluster/raycluster.yaml @@ -0,0 +1,49 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: {{.Name}} + labels: + perf-test: ray-cluster +spec: + rayVersion: '2.9.3' + headGroupSpec: + serviceType: ClusterIP + rayStartParams: + dashboard-host: '0.0.0.0' + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "10m" + volumes: + - name: ray-logs + emptyDir: {} + workerGroupSpecs: + - replicas: {{.Replicas}} + minReplicas: 1 + maxReplicas: 10 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + limits: + cpu: "1" + requests: + cpu: "10m" diff --git a/benchmark/perf-tests/10000-raycluster/results/junit.xml b/benchmark/perf-tests/10000-raycluster/results/junit.xml new file mode 100644 index 00000000000..718cf9603a0 --- /dev/null +++ b/benchmark/perf-tests/10000-raycluster/results/junit.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/benchmark/perf-tests/10000-rayjob/config.yaml b/benchmark/perf-tests/10000-rayjob/config.yaml new file mode 100644 index 00000000000..f2b6d6894e1 --- /dev/null +++ b/benchmark/perf-tests/10000-rayjob/config.yaml @@ -0,0 +1,81 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 10m +- name: Creating RayJobs for PyTorch MNIST fine-tuning + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 50 + tuningSet: Uniform100qps + objectBundle: + - basename: pytorch-mnist + objectTemplatePath: pytorch-mnist-rayjob.yaml + Image: "rayproject/ray:2.9.3" +- name: Creating RayJobs for Ray Data Image Resizing + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 50 + tuningSet: Uniform100qps + objectBundle: + - basename: ray-data-image-resize + objectTemplatePath: ray-data-image-resize.yaml + Image: "rayproject/ray:2.9.3" +- name: Wait for RayJobs complete + measurements: + - Identifier: WaitForRayJob + Method: Exec + Params: + timeout: 60m + command: + - "bash" + - "common/wait-for-rayjobs.sh" + - "5000" # 10000 total since we deploy 2 RayJobs with 5000 replicas each +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather + operationTimeout: 10m +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather +- name: Measure job finished + measurements: + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: gather diff --git a/benchmark/perf-tests/10000-rayjob/pytorch-mnist-rayjob.yaml b/benchmark/perf-tests/10000-rayjob/pytorch-mnist-rayjob.yaml new file mode 100644 index 00000000000..526ca1106ee --- /dev/null +++ b/benchmark/perf-tests/10000-rayjob/pytorch-mnist-rayjob.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: rayjob-pytorch-mnist +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_train_pytorch_mnist.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"NUM_WORKERS":"2","CPUS_PER_WORKER":"1","OMP_NUM_THREADS":"1"}}' -- python ray_train_pytorch_mnist.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "2Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "2Gi" diff --git a/benchmark/perf-tests/10000-rayjob/ray-data-image-resize.yaml b/benchmark/perf-tests/10000-rayjob/ray-data-image-resize.yaml new file mode 100644 index 00000000000..8c9e2bdc3f3 --- /dev/null +++ b/benchmark/perf-tests/10000-rayjob/ray-data-image-resize.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: ray-data-image-resize +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_data_image_resize.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"BUCKET_NAME":"ray-images","BUCKET_PREFIX":"images"}}' -- python ray_data_image_resize.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "2Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "2Gi" diff --git a/benchmark/perf-tests/10000-rayjob/results/junit.xml b/benchmark/perf-tests/10000-rayjob/results/junit.xml new file mode 100644 index 00000000000..6a8386d5d73 --- /dev/null +++ b/benchmark/perf-tests/10000-rayjob/results/junit.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/benchmark/perf-tests/5000-raycluster/config.yaml b/benchmark/perf-tests/5000-raycluster/config.yaml new file mode 100644 index 00000000000..b34379ab911 --- /dev/null +++ b/benchmark/perf-tests/5000-raycluster/config.yaml @@ -0,0 +1,68 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s +- name: Preload Images + measurements: + - Identifier: PreloadImages + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/preload-image.sh" +- name: Creating Ray clusters + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 50 + tuningSet: Uniform100qps + objectBundle: + - basename: raycluster + objectTemplatePath: raycluster.yaml + templateFillMap: + Replicas: 3 + Image: "rayproject/ray:2.9.3" +- name: Wait for RayClusters ready + measurements: + - Identifier: WaitForRayCluster + Method: Exec + Params: + timeout: 30m + command: + - "bash" + - "common/wait-for-rayclusters.sh" + - "5000" +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather diff --git a/benchmark/perf-tests/5000-raycluster/raycluster.yaml b/benchmark/perf-tests/5000-raycluster/raycluster.yaml new file mode 100644 index 00000000000..4a4c37bebd2 --- /dev/null +++ b/benchmark/perf-tests/5000-raycluster/raycluster.yaml @@ -0,0 +1,49 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: {{.Name}} + labels: + perf-test: ray-cluster +spec: + rayVersion: '2.9.3' + headGroupSpec: + serviceType: ClusterIP + rayStartParams: + dashboard-host: '0.0.0.0' + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "10m" + volumes: + - name: ray-logs + emptyDir: {} + workerGroupSpecs: + - replicas: {{.Replicas}} + minReplicas: 1 + maxReplicas: 10 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + limits: + cpu: "1" + requests: + cpu: "10m" diff --git a/benchmark/perf-tests/5000-raycluster/results/junit.xml b/benchmark/perf-tests/5000-raycluster/results/junit.xml new file mode 100644 index 00000000000..500bc3b386f --- /dev/null +++ b/benchmark/perf-tests/5000-raycluster/results/junit.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/benchmark/perf-tests/5000-rayjob/config.yaml b/benchmark/perf-tests/5000-rayjob/config.yaml new file mode 100644 index 00000000000..1c900641f14 --- /dev/null +++ b/benchmark/perf-tests/5000-rayjob/config.yaml @@ -0,0 +1,81 @@ +name: kuberay +namespace: + number: 100 +tuningSets: +- name: Uniform100qps + qpsLoad: + qps: 100 +steps: +- name: Start measurements + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 30m + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: start + apiVersion: ray.io/v1 + kind: RayCluster + labelSelector: app.kubernetes.io/created-by = kuberay-operator + operationTimeout: 120s + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: start + labelSelector: app.kubernetes.io/created-by = kuberay-operator + threshold: 10m +- name: Creating RayJobs for PyTorch MNIST fine-tuning + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 25 + tuningSet: Uniform100qps + objectBundle: + - basename: pytorch-mnist + objectTemplatePath: pytorch-mnist-rayjob.yaml + Image: "rayproject/ray:2.9.3" +- name: Creating RayJobs for Ray Data Image Resizing + phases: + - namespaceRange: + min: 1 + max: 100 + replicasPerNamespace: 25 + tuningSet: Uniform100qps + objectBundle: + - basename: ray-data-image-resize + objectTemplatePath: ray-data-image-resize.yaml + Image: "rayproject/ray:2.9.3" +- name: Wait for RayJobs complete + measurements: + - Identifier: WaitForRayJob + Method: Exec + Params: + timeout: 60m + command: + - "bash" + - "common/wait-for-rayjobs.sh" + - "2500" # total 5000 since we deploy 2 RayJobs with 2500 instances each +- name: Measure wait for pods to be running + measurements: + - Identifier: WaitForControlledPodsRunning + Method: WaitForControlledPodsRunning + Params: + action: gather + operationTimeout: 10m +- name: Measure pod startup latency + measurements: + - Identifier: PodStartupLatency + Method: PodStartupLatency + Params: + action: gather +- name: Measure job finished + measurements: + - Identifier: JobLifecycleLatency + Method: JobLifecycleLatency + Params: + action: gather diff --git a/benchmark/perf-tests/5000-rayjob/pytorch-mnist-rayjob.yaml b/benchmark/perf-tests/5000-rayjob/pytorch-mnist-rayjob.yaml new file mode 100644 index 00000000000..526ca1106ee --- /dev/null +++ b/benchmark/perf-tests/5000-rayjob/pytorch-mnist-rayjob.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: rayjob-pytorch-mnist +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_train_pytorch_mnist.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"NUM_WORKERS":"2","CPUS_PER_WORKER":"1","OMP_NUM_THREADS":"1"}}' -- python ray_train_pytorch_mnist.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "2Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "2Gi" diff --git a/benchmark/perf-tests/5000-rayjob/ray-data-image-resize.yaml b/benchmark/perf-tests/5000-rayjob/ray-data-image-resize.yaml new file mode 100644 index 00000000000..8c9e2bdc3f3 --- /dev/null +++ b/benchmark/perf-tests/5000-rayjob/ray-data-image-resize.yaml @@ -0,0 +1,63 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: {{.Name}} + labels: + perf-test: ray-data-image-resize +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray_data_image_resize.py + submitterPodTemplate: + spec: + restartPolicy: Never + containers: + - name: submitter-job + image: {{.Image}} + command: + - "sh" + - "-c" + args: + - | + #!/bin/sh + + ray job logs $RAY_JOB_SUBMISSION_ID --address=http://$RAY_DASHBOARD_ADDRESS --follow || \ + ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID --runtime-env-json '{"env_vars":{"BUCKET_NAME":"ray-images","BUCKET_PREFIX":"images"}}' -- python ray_data_image_resize.py + resources: + requests: + cpu: "10m" + rayClusterSpec: + rayVersion: '2.9.3' + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + spec: + containers: + - name: ray-head + image: {{.Image}} + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: "100m" + memory: "2Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: {{.Image}} + resources: + requests: + cpu: "100m" + memory: "2Gi" diff --git a/benchmark/perf-tests/5000-rayjob/results/junit.xml b/benchmark/perf-tests/5000-rayjob/results/junit.xml new file mode 100644 index 00000000000..03f3a8ef745 --- /dev/null +++ b/benchmark/perf-tests/5000-rayjob/results/junit.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/benchmark/perf-tests/README.md b/benchmark/perf-tests/README.md new file mode 100644 index 00000000000..8a824bf7a97 --- /dev/null +++ b/benchmark/perf-tests/README.md @@ -0,0 +1,60 @@ +# KubeRay Performance Tests + +This directory contains a collection of large scale KubeRay tests using [clusterloader2](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2). +clusterloader2 is a Kubernetes load testing tool by [SIG Scalability](https://github.com/kubernetes/community/blob/master/sig-scalability) used for Kubernetes scalability and performance testing. + +## Running clusterloader2 tests + +First, install the perf-tests repository and compile the clusterloader2 binary + +```sh +git clone git@github.com:kubernetes/perf-tests.git +cd perf-tests/clusterloader2 +go build -o clusterloader2 ./cmd +``` + +Run the following command to run clusterloader2 against one of the test folders. In this example we'll run the test configured in the [100-rayjob](./100-rayjob/) folder. + +```sh +clusterloader2 --provider= --kubeconfig= --testconfig=100-rayjob/config.yaml +``` + +## Tests & Results + +Each directory contains a test scenario and it's clusterloader2 configuraiton. Within the directories contains a `results` subdirectory containing junit.xml files generated by clusterloader2 +for previously executed runs of the tests. + +The current lists of tests are: +* [100 RayCluster test](./100-raycluster/) +* [100 RayJob test](./100-rayjob/) +* [1000 RayCluster test](./1000-raycluster/) +* [1000 RayJob test](./1000-rayjob/) +* [5000 RayCluster test](./5000-raycluster/) +* [5000 RayJob test](./5000-rayjob/) +* [10000 RayCluster test](./10000-raycluster/) +* [10000 RayJob test](./10000-rayjob/) + +All published results are based on tests that ran on GKE clusters using KubeRay v1.1.1. Each test directory contains a +`results/junit.xml` file containing the Cluster Loader 2 steps that were successfully completed. +To learn more about the benchmark measurements, see [Cluster Loader 2 Measurements](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2#measurement). + +## Run a performance test with Kind + +You can test clusterloader2 configs using Kind. + +First create a kind cluster: +```sh +kind create cluster --image=kindest/node:v1.27.3 +``` + +Install KubeRay; +```sh +helm install kuberay-operator kuberay/kuberay-operator --version 1.1.0 +``` + +Run a clusterloader2 test: +```sh +clusterloader2 --provider kind --kubeconfig ~/.kube/config --testconfig ./100-rayjob/config.yaml +``` + +Note: If you want to generate a number of RayJob custom resources other than 100, you need to make the following changes: (1) modify `replicasPerNamespace` in the "Creating RayJobs" step of the config.yaml file, and (2) adjust `expect_succeeded` in the `wait-for-rayjobs.sh` file. diff --git a/benchmark/perf-tests/common/image-preload-daemonset.yaml b/benchmark/perf-tests/common/image-preload-daemonset.yaml new file mode 100644 index 00000000000..35fc470948d --- /dev/null +++ b/benchmark/perf-tests/common/image-preload-daemonset.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: ray-image-preloader + labels: + k8s-app: ray-image-preloader +spec: + selector: + matchLabels: + k8s-app: ray-image-preloader + template: + metadata: + labels: + name: ray-image-preloader + k8s-app: ray-image-preloader + spec: + containers: + - image: {{.Image}} + name: ray-image-preloader + command: [ "sleep", "inf" ] diff --git a/benchmark/perf-tests/common/preload-image.sh b/benchmark/perf-tests/common/preload-image.sh new file mode 100644 index 00000000000..319283ebfa1 --- /dev/null +++ b/benchmark/perf-tests/common/preload-image.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +kubectl apply -f "${SCRIPT_DIR}"/image-preload-daemonset.yaml + +kubectl rollout status daemonset ray-image-preloader --timeout 25m diff --git a/benchmark/perf-tests/common/wait-for-rayclusters.sh b/benchmark/perf-tests/common/wait-for-rayclusters.sh new file mode 100644 index 00000000000..d8d48c7d9a8 --- /dev/null +++ b/benchmark/perf-tests/common/wait-for-rayclusters.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +expect_succeeded=$1 +echo "waiting for $expect_succeeded RayClusters to be completed successfully" + +while true; do + num_succeeded=$(kubectl get raycluster -l perf-test=ray-cluster -A -o jsonpath='{range .items[*]}{.metadata.name} {.status.state}{"\n"}' | grep -c ready) + echo "$num_succeeded RayClusters ready..." + + if [[ "$num_succeeded" == "$expect_succeeded" ]]; then + break; + fi + + sleep 5 +done + +echo "$num_succeeded RayClusters ready!" diff --git a/benchmark/perf-tests/common/wait-for-rayjobs.sh b/benchmark/perf-tests/common/wait-for-rayjobs.sh new file mode 100644 index 00000000000..eb71818b030 --- /dev/null +++ b/benchmark/perf-tests/common/wait-for-rayjobs.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +expect_succeeded=$1 +echo "waiting for $expect_succeeded PyTorch RayJobs to be completed successfully" + +while true; do + num_succeeded=$(kubectl get rayjob -A -l perf-test=rayjob-pytorch-mnist -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobStatus}{"\n"}' | grep -c SUCCEEDED) + echo "$num_succeeded RayJobs completed..." + + if [[ "$num_succeeded" == "$expect_succeeded" ]]; then + break; + fi + + echo "printing RayJobs with Failed deployment status" + kubectl get rayjob -A -l perf-test=rayjob-pytorch-mnist -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobDeploymentStatus}{"\n"}' | grep Failed + + echo "printing RayJobs with FAILED job status" + kubectl get rayjob -A -l perf-test=rayjob-pytorch-mnist -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobStatus}{"\n"}' | grep FAILED + + sleep 30 +done + +echo "waiting for $expect_succeeded Ray Data RayJobs to be completed successfully" + +while true; do + num_succeeded=$(kubectl get rayjob -A -l perf-test=ray-data-image-resize -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobStatus}{"\n"}' | grep -c SUCCEEDED) + echo "$num_succeeded RayJobs completed..." + + if [[ "$num_succeeded" == "$expect_succeeded" ]]; then + break; + fi + + echo "printing RayJobs with Failed deployment status" + kubectl get rayjob -A -l perf-test=ray-data-image-resize -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobDeploymentStatus}{"\n"}' | grep Failed + + echo "printing RayJobs with FAILED job status" + kubectl get rayjob -A -l perf-test=ray-data-image-resize -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobStatus}{"\n"}' | grep FAILED + + sleep 30 +done + +echo "$num_succeeded RayJobs completed!" diff --git a/benchmark/perf-tests/images/ray-pytorch/Dockerfile b/benchmark/perf-tests/images/ray-pytorch/Dockerfile new file mode 100644 index 00000000000..60de9862704 --- /dev/null +++ b/benchmark/perf-tests/images/ray-pytorch/Dockerfile @@ -0,0 +1,5 @@ +FROM rayproject/ray:2.9.3 + +RUN pip install torch torchvision numpy +RUN wget https://raw.githubusercontent.com/ray-project/kuberay/master/ray-operator/config/samples/pytorch-mnist/ray_train_pytorch_mnist.py +RUN wget https://raw.githubusercontent.com/ray-project/kuberay/master/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize.py diff --git a/cli/Makefile b/cli/Makefile deleted file mode 100644 index db32a1017f2..00000000000 --- a/cli/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -OUTPUT_NAME := kuberay -BUILD_GOOS = $(shell go env GOOS) - -COMMIT := $(shell git rev-parse --short HEAD) -VERSION := v1.1.0 - -DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') -REPO="github.com/ray-project/kuberay" - -BUILD_FLAGS = -ldflags="-X '${REPO}/cli/pkg/cmd/version.Version=$(VERSION)' \ - -X '${REPO}/cli/pkg/cmd/version.gitCommit=$(COMMIT)' \ - -X '${REPO}/cli/pkg/cmd/version.buildDate=$(DATE)'" - -build: - go build $(BUILD_FLAGS) -o $(OUTPUT_NAME) main.go - -release: - GOOS=linux GOARCH=amd64 make build OUTPUT_NAME=_output/linux/amd64/$(OUTPUT_NAME) - GOOS=darwin GOARCH=amd64 make build OUTPUT_NAME=_output/darwin/amd64/$(OUTPUT_NAME) - zip _output/kuberay-$(VERSION)-linux-amd64.zip _output/linux/amd64/$(OUTPUT_NAME) - zip _output/kuberay-$(VERSION)-darwin-amd64.zip _output/darwin/amd64/$(OUTPUT_NAME) diff --git a/cli/README.md b/cli/README.md deleted file mode 100644 index 6baf03a5d92..00000000000 --- a/cli/README.md +++ /dev/null @@ -1,163 +0,0 @@ -# KubeRay CLI - -[![Build Status](https://github.com/ray-project/kuberay/workflows/Go-build-and-test/badge.svg)](https://github.com/ray-project/kuberay/actions) -[![Go Report Card](https://goreportcard.com/badge/github.com/ray-project/kuberay)](https://goreportcard.com/report/github.com/ray-project/kuberay) - -KubeRay CLI provides the ability to manage kuberay resources (ray clusters, compute templates etc) through command line interface. - -!!! note - - The KubeRay CLI is an optional interface backed by the KubeRay API server. - It provides a layer of simplified configuration for KubeRay resources. - - The KubeRay CLI is community-managed and is not officially endorsed by the - Ray maintainers. At this time, the only officially supported methods for - managing KubeRay resources are - - - Direct management of KubeRay custom resources via kubectl, kustomize, and Kubernetes language clients. - - Helm charts. - - KubeRay CLI maintainer contacts (GitHub handles): - @Jeffwan @scarlet25151 - -## Installation - -Please check [release page](https://github.com/ray-project/kuberay/releases) and download the binaries. - -## Prerequisites - -- Kuberay operator needs to be running. -- Kuberay apiserver needs to be running and accessible. - -## Development - -- Kuberay CLI uses [Cobra framework](https://github.com/spf13/cobra) for the CLI application. -- Kuberay CLI depends on kuberay apiserver to manage these resources by sending grpc requests to the kuberay apiserver. - -You can build kuberay binary following this way. - -``` -cd kuberay/cli -go build -o kuberay -a main.go -``` - -## Usage - -### Configure kuberay apiserver endpoint - -- Default kuberay apiserver endpoint: `127.0.0.1:8887`. -- If kuberay apiserver is not run locally, this must be set in order to manage ray clusters and ray compute templates. - -#### Read current kuberay apiserver endpoint - -`./kuberay config get endpoint` - -#### Reset kuberay apiserver endpoint to default (`127.0.0.1:8887`) - -`./kuberay config reset endpoint` - -#### Set kuberay apiserver endpoint - -`./kuberay config set endpoint ` - -### Manage Ray Clusters - -#### Create a Ray Cluster - -``` -Usage: -kuberay cluster create [flags] - -Flags: - --environment string environment of the cluster (valid values: DEV, TESTING, STAGING, PRODUCTION) (default "DEV") - --head-compute-template string compute template name for ray head - --head-image string ray head image - --head-service-type string ray head service type (ClusterIP, NodePort, LoadBalancer) (default "ClusterIP") - --name string name of the cluster - -n, --namespace string kubernetes namespace where the cluster will be - --user string SSO username of ray cluster creator - --version string version of the ray cluster (default "1.9.0") - --worker-compute-template string compute template name of worker in the first worker group - --worker-group-name string first worker group name - --worker-image string image of worker in the first worker group - --worker-replicas uint32 pod replicas of workers in the first worker group (default 1) -``` - -> Known Limitation: Currently only one worker compute template is supported during creation. - -#### Get a Ray Cluster - -`./kuberay cluster get -n ` - -#### List Ray Clusters - -`./kuberay cluster -n list` - -#### Delete a Ray Cluster - -`./kuberay cluster delete -n ` - -### Manage Ray Compute Template - -#### Create a Compute Template -``` -Usage: - kuberay template compute create [flags] - -Flags: - --cpu uint32 ray pod CPU (default 1) - --gpu uint32 ray head GPU - --gpu-accelerator string GPU Accelerator type - --memory uint32 ray pod memory in GB (default 1) - --name string name of the compute template - -n, --namespace string kubernetes namespace where the compute template will be stored - -``` - -#### Get a Ray Compute Template -`./kuberay template compute get -n ` - -#### List Ray Compute Templates -`./kuberay template compute list -n ` - -#### Delete a Ray Compute Template -`./kuberay template compute delete -n ` - -## End to end example - -Configure the endpoints - -``` -kubectl port-forward svc/kuberay-apiserver-service 8887:8887 -n ray-system -./kuberay config set endpoint 127.0.0.1:8887 -``` - -Create compute templates - -``` -./kuberay template compute create -n --cpu 2 --memory 4 --name "worker-template" -./kuberay template compute create -n --cpu 1 --memory 2 --name "head-template" -``` - -List compute templates created - -``` -./kuberay template compute list -``` - -Create the cluster - -``` -./kuberay cluster create -n --name test-cluster --user jiaxin.shan \ ---head-compute-template head-template \ ---head-image rayproject/ray:1.9.2 \ ---worker-group-name small-wg \ ---worker-compute-template worker-template \ ---worker-image rayproject/ray:1.9.2 -``` - -List the clusters - -``` -./kuberay cluster list -``` diff --git a/cli/cmd/root.go b/cli/cmd/root.go deleted file mode 100644 index c57057f8e3a..00000000000 --- a/cli/cmd/root.go +++ /dev/null @@ -1,162 +0,0 @@ -package cmd - -import ( - "fmt" - "os" - "strings" - "time" - - "k8s.io/klog/v2" - - "github.com/ray-project/kuberay/cli/pkg/cmd/cluster" - "github.com/ray-project/kuberay/cli/pkg/cmd/config" - "github.com/ray-project/kuberay/cli/pkg/cmd/info" - "github.com/ray-project/kuberay/cli/pkg/cmd/template" - "github.com/ray-project/kuberay/cli/pkg/cmd/version" - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/spf13/cobra" - - "github.com/fatih/color" - "github.com/kris-nova/logger" - lol "github.com/kris-nova/lolgopher" - "github.com/spf13/viper" -) - -var cfgFile string - -// rootCmd represents the base command when called without any subcommands -var rootCmd = &cobra.Command{ - Use: "kuberay", - Short: "kuberay offers life cycle management of ray clusters", -} - -// Execute adds all child commands to the root command and sets flags appropriately. -// This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute() { - cobra.CheckErr(rootCmd.Execute()) -} - -func init() { - loggerLevel := rootCmd.PersistentFlags().IntP("log-level", "l", 3, "set log level, use 0 to silence, 4 for debugging and 5 for debugging with AWS debug logging") - colorValue := rootCmd.PersistentFlags().StringP("color", "C", "true", "toggle colorized logs (valid options: true, false, fabulous)") - cobra.OnInitialize(initConfig, func() { - initLogger(*loggerLevel, *colorValue) - }) - - // Here you will define your flags and configuration settings. - // Cobra supports persistent flags, which, if defined here, - // will be global for your application. - - rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.kuberay.yaml)") - - // Cobra also supports local flags, which will only run - // when this action is called directly. - rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") - - rootCmd.PersistentFlags().BoolP("help", "h", false, "help for this command") - rootCmd.AddCommand(info.NewCmdInfo()) - rootCmd.AddCommand(version.NewCmdVersion()) - rootCmd.AddCommand(cluster.NewCmdCluster()) - rootCmd.AddCommand(template.NewCmdTemplate()) - rootCmd.AddCommand(config.NewCmdConfig()) -} - -// initConfig reads in config file and ENV variables if set. -func initConfig() { - if cfgFile != "" { - // Use config file from the flag. - viper.SetConfigFile(cfgFile) - } else { - // Find home directory. - home, err := os.UserHomeDir() - cobra.CheckErr(err) - - // Search config in home directory with name ".cli" (without extension). - viper.AddConfigPath(home) - viper.SetConfigType("yaml") - viper.SetConfigName(".kuberay") - - viper.SetDefault("endpoint", fmt.Sprintf("%s:%s", cmdutil.DefaultRpcAddress, cmdutil.DefaultRpcPort)) - // Do not write to file system if it already exists - if err := viper.SafeWriteConfig(); err != nil { - if _, ok := err.(viper.ConfigFileAlreadyExistsError); !ok { - klog.Fatal(err) - } - } - } - - viper.AutomaticEnv() // read in environment variables that match - - if err := viper.ReadInConfig(); err != nil { - klog.Fatal(err) - } -} - -func initLogger(level int, colorValue string) { - logger.Layout = "2021-01-02 15:04:05" - - var bitwiseLevel int - switch level { - case 4: - bitwiseLevel = logger.LogDeprecated | logger.LogAlways | logger.LogSuccess | logger.LogCritical | logger.LogWarning | logger.LogInfo | logger.LogDebug - case 3: - bitwiseLevel = logger.LogDeprecated | logger.LogAlways | logger.LogSuccess | logger.LogCritical | logger.LogWarning | logger.LogInfo - case 2: - bitwiseLevel = logger.LogDeprecated | logger.LogAlways | logger.LogSuccess | logger.LogCritical | logger.LogWarning - case 1: - bitwiseLevel = logger.LogDeprecated | logger.LogAlways | logger.LogSuccess | logger.LogCritical - case 0: - bitwiseLevel = logger.LogDeprecated | logger.LogAlways | logger.LogSuccess - default: - bitwiseLevel = logger.LogDeprecated | logger.LogEverything - } - logger.BitwiseLevel = bitwiseLevel - - switch colorValue { - case "fabulous": - logger.Writer = lol.NewLolWriter() - case "true": - logger.Writer = color.Output - } - - logger.Line = func(prefix, format string, a ...interface{}) string { - if !strings.Contains(format, "\n") { - format = fmt.Sprintf("%s%s", format, "\n") - } - now := time.Now() - fNow := now.Format(logger.Layout) - var colorize func(format string, a ...interface{}) string - var icon string - switch prefix { - case logger.PreAlways: - icon = "✿" - colorize = color.GreenString - case logger.PreCritical: - icon = "✖" - colorize = color.RedString - case logger.PreInfo: - icon = "ℹ" - colorize = color.CyanString - case logger.PreDebug: - icon = "▶" - colorize = color.GreenString - case logger.PreSuccess: - icon = "✔" - colorize = color.CyanString - case logger.PreWarning: - icon = "!" - colorize = color.GreenString - default: - icon = "ℹ" - colorize = color.CyanString - } - - out := fmt.Sprintf(format, a...) - out = fmt.Sprintf("%s [%s] %s", fNow, icon, out) - if colorValue == "true" { - out = colorize(out) - } - - return out - } -} diff --git a/cli/go.mod b/cli/go.mod deleted file mode 100644 index df2129bc9b8..00000000000 --- a/cli/go.mod +++ /dev/null @@ -1,47 +0,0 @@ -module github.com/ray-project/kuberay/cli - -go 1.20 - -require ( - github.com/fatih/color v1.13.0 - github.com/kris-nova/logger v0.2.2 - github.com/kris-nova/lolgopher v0.0.0-20210112022122-73f0047e8b65 - github.com/olekukonko/tablewriter v0.0.5 - github.com/ray-project/kuberay/proto v0.0.0-20220119062608-4054f1bf1765 - github.com/spf13/cobra v1.3.0 - github.com/spf13/viper v1.10.1 - google.golang.org/grpc v1.59.0 - k8s.io/klog/v2 v2.20.0 -) - -require ( - github.com/fsnotify/fsnotify v1.5.1 // indirect - github.com/go-logr/logr v1.0.0 // indirect - github.com/golang/protobuf v1.5.3 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0 // indirect - github.com/hashicorp/hcl v1.0.0 // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect - github.com/magiconair/properties v1.8.5 // indirect - github.com/mattn/go-colorable v0.1.12 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect - github.com/mattn/go-runewidth v0.0.13 // indirect - github.com/mitchellh/mapstructure v1.4.3 // indirect - github.com/pelletier/go-toml v1.9.4 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/spf13/afero v1.6.0 // indirect - github.com/spf13/cast v1.4.1 // indirect - github.com/spf13/jwalterweatherman v1.1.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect - github.com/subosito/gotenv v1.2.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/protobuf v1.33.0 // indirect - gopkg.in/ini.v1 v1.66.2 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect -) - -replace github.com/ray-project/kuberay/proto => ../proto diff --git a/cli/go.sum b/cli/go.sum deleted file mode 100644 index a23ca1365e9..00000000000 --- a/cli/go.sum +++ /dev/null @@ -1,821 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= -cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= -cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= -cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= -cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= -cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= -cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= -cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= -cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= -cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= -cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= -cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= -cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= -cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= -cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= -cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= -cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM= -cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= -cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= -cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= -cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= -github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= -github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= -github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= -github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI= -github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logr/logr v1.0.0 h1:kH951GinvFVaQgy/ki/B3YYmQtRpExGigSJg6O8z5jo= -github.com/go-logr/logr v1.0.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= -github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0 h1:rgxjzoDmDXw5q8HONgyHhBas4to0/XWRo/gPpJhsUNQ= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0/go.mod h1:qrJPVzv9YlhsrxJc3P/Q85nr0w1lIRikTl4JlhdDH5w= -github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= -github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= -github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= -github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= -github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= -github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= -github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= -github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= -github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kris-nova/logger v0.2.2 h1:qdWg2fNr4Bni4obkgehwOSbCoxaX+wDGGrzQ1T2mA20= -github.com/kris-nova/logger v0.2.2/go.mod h1:uOTzfb9ssx0XYb3UpeAjKsys8KByjD12OMN4szmym4w= -github.com/kris-nova/lolgopher v0.0.0-20210112022122-73f0047e8b65 h1:g+tnN/LHRq6LaUfeREPluv9g6jOtN3P1hQwTHofQTSw= -github.com/kris-nova/lolgopher v0.0.0-20210112022122-73f0047e8b65/go.mod h1:V0HF/ZBlN86HqewcDC/cVxMmYDiRukWjSrgKLUAn9Js= -github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w= -github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= -github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= -github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= -github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= -github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= -github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= -github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= -github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.4.3 h1:OVowDSCllw/YjdLkam3/sm7wEtOy59d8ndGgCcyj8cs= -github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= -github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= -github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= -github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= -github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= -github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA= -github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v1.3.0 h1:R7cSvGu+Vv+qX0gW5R/85dx2kmmJT5z5NM8ifdYjdn0= -github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4= -github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= -github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM= -github.com/spf13/viper v1.10.1 h1:nuJZuYpG7gTj/XqiUwg8bA0cp1+M2mC3J4g5luUYBKk= -github.com/spf13/viper v1.10.1/go.mod h1:IGlFPqhNAPKRxohIzWpI5QEy4kuI7tcl5WvR+8qy1rU= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= -github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= -github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= -go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= -go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= -golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= -golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= -golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= -golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= -google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= -google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= -google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= -google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= -google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= -google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= -google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo= -google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4= -google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= -google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= -google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= -google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= -google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= -google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI= -google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= -google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= -google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= -google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= -google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= -google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A= -google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= -google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= -google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= -google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= -google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= -google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= -google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= -google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= -google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= -google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= -google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/ini.v1 v1.66.2 h1:XfR1dOYubytKy4Shzc2LHrrGhU0lDCfDGG1yLPmpgsI= -gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/klog/v2 v2.20.0 h1:tlyxlSvd63k7axjhuchckaRJm+a92z5GSOrTOQY5sHw= -k8s.io/klog/v2 v2.20.0/go.mod h1:Gm8eSIfQN6457haJuPaMxZw4wyP5k+ykPFlrhQDvhvw= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/cli/main.go b/cli/main.go deleted file mode 100644 index 55f995f2c82..00000000000 --- a/cli/main.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "github.com/ray-project/kuberay/cli/cmd" - -func main() { - cmd.Execute() -} diff --git a/cli/pkg/cmd/cluster/cluster.go b/cli/pkg/cmd/cluster/cluster.go deleted file mode 100644 index e1e82bc1a71..00000000000 --- a/cli/pkg/cmd/cluster/cluster.go +++ /dev/null @@ -1,23 +0,0 @@ -package cluster - -import ( - "github.com/spf13/cobra" -) - -func NewCmdCluster() *cobra.Command { - cmd := &cobra.Command{ - Use: "cluster ", - Short: "Manage ray cluster", - Long: ``, - Annotations: map[string]string{ - "IsCore": "true", - }, - } - - cmd.AddCommand(newCmdGet()) - cmd.AddCommand(newCmdList()) - cmd.AddCommand(newCmdCreate()) - cmd.AddCommand(newCmdDelete()) - - return cmd -} diff --git a/cli/pkg/cmd/cluster/create.go b/cli/pkg/cmd/cluster/create.go deleted file mode 100644 index 8a35560875b..00000000000 --- a/cli/pkg/cmd/cluster/create.go +++ /dev/null @@ -1,156 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "log" - "os" - "time" - - "k8s.io/klog/v2" - - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" -) - -type CreateOptions struct { - name string - namespace string - environment string - version string - user string - headComputeTemplate string - headImage string - headServiceType string - workerGroupName string - workerComputeTemplate string - workerImage string - workerReplicas uint32 -} - -func newCmdCreate() *cobra.Command { - opts := CreateOptions{} - - cmd := &cobra.Command{ - Use: "create", - Short: "Create a ray cluster", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return createCluster(opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", "kubernetes namespace where the cluster will be") - cmd.Flags().StringVar(&opts.name, "name", "", "name of the cluster") - cmd.Flags().StringVar(&opts.environment, "environment", "DEV", - "environment of the cluster (valid values: DEV, TESTING, STAGING, PRODUCTION)") - cmd.Flags().StringVar(&opts.version, "version", "1.9.0", "version of the ray cluster") - cmd.Flags().StringVar(&opts.user, "user", "", "SSO username of ray cluster creator") - cmd.Flags().StringVar(&opts.headComputeTemplate, "head-compute-template", "", "compute template name for ray head") - cmd.Flags().StringVar(&opts.headImage, "head-image", "", "ray head image") - cmd.Flags().StringVar(&opts.headServiceType, "head-service-type", "ClusterIP", "ray head service type (ClusterIP, NodePort, LoadBalancer)") - cmd.Flags().StringVar(&opts.workerGroupName, "worker-group-name", "", "first worker group name") - cmd.Flags().StringVar(&opts.workerComputeTemplate, "worker-compute-template", "", "compute template name of worker in the first worker group") - cmd.Flags().StringVar(&opts.workerImage, "worker-image", "", "image of worker in the first worker group") - cmd.Flags().Uint32Var(&opts.workerReplicas, "worker-replicas", 1, "pod replicas of workers in the first worker group") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("name"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("user"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("head-image"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("head-compute-template"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("worker-image"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("worker-compute-template"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("worker-group-name"); err != nil { - klog.Warning(err) - } - - // handle user from auth and inject it. - - return cmd -} - -func createCluster(opts CreateOptions) error { - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewClusterServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - envInt, ok := go_client.Cluster_Environment_value[opts.environment] - if !ok { - fmt.Fprintf(os.Stderr, "error: Invalid environment value. Valid values: DEV, TESTING, STAGING, PRODUCTION\n") - os.Exit(1) - } - - headStartParams := make(map[string]string) - headStartParams["port"] = "6379" - headStartParams["dashboard-host"] = "0.0.0.0" - headStartParams["node-ip-address"] = "$MY_POD_IP" - headStartParams["redis-password"] = "LetMeInRay" - - headSpec := &go_client.HeadGroupSpec{ - ComputeTemplate: opts.headComputeTemplate, - Image: opts.headImage, - ServiceType: opts.headServiceType, - RayStartParams: headStartParams, - } - - workerStartParams := make(map[string]string) - workerStartParams["node-ip-address"] = "$MY_POD_IP" - workerStartParams["redis-password"] = "LetMeInRay" - - var workerGroupSpecs []*go_client.WorkerGroupSpec - spec := &go_client.WorkerGroupSpec{ - GroupName: opts.workerGroupName, - ComputeTemplate: opts.workerComputeTemplate, - Image: opts.workerImage, - Replicas: int32(opts.workerReplicas), - MinReplicas: int32(opts.workerReplicas), - MaxReplicas: int32(opts.workerReplicas), - RayStartParams: workerStartParams, - } - workerGroupSpecs = append(workerGroupSpecs, spec) - - cluster := &go_client.Cluster{ - Name: opts.name, - Namespace: opts.namespace, - User: opts.user, - Version: opts.version, - Environment: *go_client.Cluster_Environment(envInt).Enum(), - ClusterSpec: &go_client.ClusterSpec{ - HeadGroupSpec: headSpec, - WorkerGroupSpec: workerGroupSpecs, - }, - } - - r, err := client.CreateCluster(ctx, &go_client.CreateClusterRequest{ - Namespace: opts.namespace, - Cluster: cluster, - }) - if err != nil { - log.Fatalf("could not create cluster %v", err) - } - - log.Printf("cluster %v is created", r.Name) - return nil -} diff --git a/cli/pkg/cmd/cluster/delete.go b/cli/pkg/cmd/cluster/delete.go deleted file mode 100644 index e9fc387cd63..00000000000 --- a/cli/pkg/cmd/cluster/delete.go +++ /dev/null @@ -1,62 +0,0 @@ -package cluster - -import ( - "context" - "log" - "time" - - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" - "k8s.io/klog/v2" -) - -type DeleteOptions struct { - namespace string -} - -func newCmdDelete() *cobra.Command { - opts := DeleteOptions{} - - cmd := &cobra.Command{ - Use: "delete ", - Short: "Delete a ray cluster by name", - Args: cobra.MaximumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return deleteCluster(args[0], opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the cluster is provisioned") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - - return cmd -} - -func deleteCluster(name string, opts DeleteOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewClusterServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - request := &go_client.DeleteClusterRequest{ - Name: name, - Namespace: opts.namespace, - } - if _, err := client.DeleteCluster(ctx, request); err != nil { - log.Fatalf("could not delete cluster %v", err) - } - - log.Printf("cluster %v has been deleted", name) - return nil -} diff --git a/cli/pkg/cmd/cluster/get.go b/cli/pkg/cmd/cluster/get.go deleted file mode 100644 index a5433b3a2e5..00000000000 --- a/cli/pkg/cmd/cluster/get.go +++ /dev/null @@ -1,73 +0,0 @@ -package cluster - -import ( - "context" - "log" - "os" - "time" - - "github.com/olekukonko/tablewriter" - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" - "k8s.io/klog/v2" -) - -type GetOptions struct { - namespace string -} - -func newCmdGet() *cobra.Command { - opts := GetOptions{} - - cmd := &cobra.Command{ - Use: "get ", - Short: "Get a ray cluster by name", - Args: cobra.MaximumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return getCluster(args[0], opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the cluster is provisioned") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - - return cmd -} - -func getCluster(name string, opts GetOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewClusterServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - r, err := client.GetCluster(ctx, &go_client.GetClusterRequest{ - Name: name, - Namespace: opts.namespace, - }) - if err != nil { - log.Fatalf("could not get cluster %v: %v", name, err) - } - row, nWorkGroups := convertClusterToString(r) - header := []string{"Name", "User", "Namespace", "Created At", "Version", "Environment", "Head Image", "Head Compute Template", "Head Service Type"} - for i := 0; i < nWorkGroups; i++ { - header = append(header, "Worker Group Name", "Worker Image", "Worker ComputeTemplate") - } - - table := tablewriter.NewWriter(os.Stdout) - table.SetHeader(header) - table.Append(row) - table.Render() - - return nil -} diff --git a/cli/pkg/cmd/cluster/list.go b/cli/pkg/cmd/cluster/list.go deleted file mode 100644 index c720b35edba..00000000000 --- a/cli/pkg/cmd/cluster/list.go +++ /dev/null @@ -1,110 +0,0 @@ -package cluster - -import ( - "context" - "log" - "os" - "time" - - "github.com/olekukonko/tablewriter" - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" -) - -type ListOptions struct { - namespace string -} - -func newCmdList() *cobra.Command { - opts := ListOptions{} - - cmd := &cobra.Command{ - Use: "list", - Short: "List all ray clusters", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return listCluster(opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the cluster is provisioned") - - return cmd -} - -func listCluster(opts ListOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewClusterServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - var clusters []*go_client.Cluster - if len(opts.namespace) == 0 { - r, err := client.ListAllClusters(ctx, &go_client.ListAllClustersRequest{}) - if err != nil { - log.Fatalf("could not list all clusters %v", err) - } - clusters = r.GetClusters() - } else { - r, err := client.ListCluster(ctx, &go_client.ListClustersRequest{ - Namespace: opts.namespace, - }) - if err != nil { - log.Fatalf("could not list clusters %v", err) - } - clusters = r.GetClusters() - } - rows, maxNumberWorkerGroups := convertClustersToStrings(clusters) - header := []string{"Name", "Namespace", "User", "Version", "Environment", "Created At", "Head Image", "Head Compute Template", "Head Service Type"} - for i := 0; i < maxNumberWorkerGroups; i++ { - header = append(header, "Worker Group Name", "Worker Image", "Worker ComputeTemplate") - } - - table := tablewriter.NewWriter(os.Stdout) - table.SetHeader(header) - table.AppendBulk(rows) - table.Render() - - return nil -} - -func convertClustersToStrings(clusters []*go_client.Cluster) ([][]string, int) { - var data [][]string - - // max number of worker groups among all clusters. This will decide how wide the table is. - maxNumberWorkerGroups := 0 - for _, r := range clusters { - row, nWorkerGroups := convertClusterToString(r) - data = append(data, row) - - if nWorkerGroups > maxNumberWorkerGroups { - maxNumberWorkerGroups = nWorkerGroups - } - } - - return data, maxNumberWorkerGroups -} - -func convertClusterToString(r *go_client.Cluster) ([]string, int) { - headResource := r.GetClusterSpec().GetHeadGroupSpec() - workerGroups := r.GetClusterSpec().GetWorkerGroupSpec() - line := []string{ - r.GetName(), r.GetNamespace(), r.GetUser(), r.GetVersion(), r.GetEnvironment().String(), r.GetCreatedAt().AsTime().String(), - headResource.GetImage(), headResource.GetComputeTemplate(), headResource.GetServiceType(), - } - nWorkGroups := len(workerGroups) - - for _, workerGroup := range workerGroups { - line = append(line, workerGroup.GetGroupName(), workerGroup.GetImage(), workerGroup.GetComputeTemplate()) - } - return line, nWorkGroups -} diff --git a/cli/pkg/cmd/config/config.go b/cli/pkg/cmd/config/config.go deleted file mode 100644 index 7ec9e1d52e3..00000000000 --- a/cli/pkg/cmd/config/config.go +++ /dev/null @@ -1,20 +0,0 @@ -package config - -import "github.com/spf13/cobra" - -func NewCmdConfig() *cobra.Command { - configCmd := &cobra.Command{ - Use: "config ", - Short: "Kuberay Config Management", - Long: ``, - Annotations: map[string]string{ - "IsCore": "false", - }, - } - - configCmd.AddCommand(NewCmdSet()) - configCmd.AddCommand(NewCmdReset()) - configCmd.AddCommand(NewCmdGet()) - - return configCmd -} diff --git a/cli/pkg/cmd/config/get.go b/cli/pkg/cmd/config/get.go deleted file mode 100644 index 8658a7cde35..00000000000 --- a/cli/pkg/cmd/config/get.go +++ /dev/null @@ -1,24 +0,0 @@ -package config - -import ( - "fmt" - - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/spf13/cobra" -) - -func NewCmdGet() *cobra.Command { - getCmd := &cobra.Command{ - Use: "get", - Short: "Get configuration in kuberay with key.", - Long: `Get configuration in kuberay. Use argument as the key`, - Args: cobra.ExactArgs(1), - Run: func(cmd *cobra.Command, args []string) { - key := args[0] - // key, _ := cmd.Flags().GetString("key") - val := cmdutil.GetVal(key) - fmt.Printf("%s\n", val) - }, - } - return getCmd -} diff --git a/cli/pkg/cmd/config/reset.go b/cli/pkg/cmd/config/reset.go deleted file mode 100644 index 62c7d850aed..00000000000 --- a/cli/pkg/cmd/config/reset.go +++ /dev/null @@ -1,18 +0,0 @@ -package config - -import ( - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/spf13/cobra" -) - -func NewCmdReset() *cobra.Command { - cmd := &cobra.Command{ - Use: "reset", - Short: "Reset configuration in kuberay to default.", - Long: ``, - Run: func(cmd *cobra.Command, args []string) { - cmdutil.Reset() - }, - } - return cmd -} diff --git a/cli/pkg/cmd/config/set.go b/cli/pkg/cmd/config/set.go deleted file mode 100644 index 0c6327beb5e..00000000000 --- a/cli/pkg/cmd/config/set.go +++ /dev/null @@ -1,23 +0,0 @@ -package config - -import ( - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/spf13/cobra" -) - -func NewCmdSet() *cobra.Command { - setCmd := &cobra.Command{ - Use: "set", - Short: "Set configuration in kuberay.", - Long: `Set configuration in kuberay. Use the first argument as key and the second argument as value`, - Args: cobra.ExactArgs(2), - Run: func(cmd *cobra.Command, args []string) { - key := args[0] - value := args[1] - // key, _ := cmd.Flags().GetString("key") - // value, _ := cmd.Flags().GetString("value") - cmdutil.SetKeyValPair(key, value) - }, - } - return setCmd -} diff --git a/cli/pkg/cmd/info/info.go b/cli/pkg/cmd/info/info.go deleted file mode 100644 index 284176fab0e..00000000000 --- a/cli/pkg/cmd/info/info.go +++ /dev/null @@ -1,54 +0,0 @@ -package info - -import ( - "encoding/json" - "fmt" - "runtime" - - "github.com/ray-project/kuberay/cli/pkg/cmd/version" - "github.com/spf13/cobra" -) - -func NewCmdInfo() *cobra.Command { - cmd := &cobra.Command{ - Use: "info", - Short: "Output the version of kuberay, and OS info", - Long: ``, - Run: func(cmd *cobra.Command, args []string) { - info := GetInfo() - fmt.Printf("KubeRay version: %s\n", info.KubeRayVersion) - fmt.Printf("OS: %s\n", info.OS) - }, - } - - return cmd -} - -// Info holds versions info -type Info struct { - KubeRayVersion string - OS string -} - -// GetInfo returns versions info -func GetInfo() Info { - return Info{ - KubeRayVersion: getKubeRayVersion(), - OS: runtime.GOOS, - } -} - -// getKubeRayVersion returns the kuberay version -func getKubeRayVersion() string { - return version.GetVersion() -} - -// String return info as JSON -func String() string { - data, err := json.Marshal(GetInfo()) - if err != nil { - return fmt.Sprintf("failed to marshal info into json: %q", err) - } - - return string(data) -} diff --git a/cli/pkg/cmd/template/compute/compute.go b/cli/pkg/cmd/template/compute/compute.go deleted file mode 100644 index fff4af99139..00000000000 --- a/cli/pkg/cmd/template/compute/compute.go +++ /dev/null @@ -1,20 +0,0 @@ -package compute - -import "github.com/spf13/cobra" - -func NewCmdComputeTemplate() *cobra.Command { - cmd := &cobra.Command{ - Use: "compute ", - Short: "Manage compute template", - Annotations: map[string]string{ - "IsCore": "true", - }, - } - - cmd.AddCommand(newCmdGet()) - cmd.AddCommand(newCmdList()) - cmd.AddCommand(newCmdCreate()) - cmd.AddCommand(newCmdDelete()) - - return cmd -} diff --git a/cli/pkg/cmd/template/compute/create.go b/cli/pkg/cmd/template/compute/create.go deleted file mode 100644 index fb73e3912db..00000000000 --- a/cli/pkg/cmd/template/compute/create.go +++ /dev/null @@ -1,82 +0,0 @@ -package compute - -import ( - "context" - "log" - "time" - - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" - "k8s.io/klog/v2" -) - -type CreateOptions struct { - name string - namespace string - cpu uint32 - memory uint32 - gpu uint32 - gpuAccelerator string -} - -func newCmdCreate() *cobra.Command { - opts := CreateOptions{} - - cmd := &cobra.Command{ - Use: "create", - Short: "Create a compute template", - Long: "Currently only one worker group is supported in CLI", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return createComputeTemplate(opts) - }, - } - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", "kubernetes namespace where the compute template will be stored") - cmd.Flags().StringVar(&opts.name, "name", "", "name of the compute template") - cmd.Flags().Uint32Var(&opts.cpu, "cpu", 1, "ray pod CPU") - cmd.Flags().Uint32Var(&opts.memory, "memory", 1, "ray pod memory in GB") - cmd.Flags().Uint32Var(&opts.gpu, "gpu", 0, "ray head GPU") - cmd.Flags().StringVar(&opts.gpuAccelerator, "gpu-accelerator", "", "GPU Accelerator type") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - if err := cmd.MarkFlagRequired("name"); err != nil { - klog.Warning(err) - } - - return cmd -} - -func createComputeTemplate(opts CreateOptions) error { - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewComputeTemplateServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - computeTemplate := &go_client.ComputeTemplate{ - Name: opts.name, - Namespace: opts.namespace, - Cpu: opts.cpu, - Memory: opts.memory, - Gpu: opts.gpu, - GpuAccelerator: opts.gpuAccelerator, - } - - r, err := client.CreateComputeTemplate(ctx, &go_client.CreateComputeTemplateRequest{ - Namespace: opts.namespace, - ComputeTemplate: computeTemplate, - }) - if err != nil { - log.Fatalf("could not create compute template %v", err) - } - - log.Printf("compute template %v has been created in %v", r.Name, r.Namespace) - return nil -} diff --git a/cli/pkg/cmd/template/compute/delete.go b/cli/pkg/cmd/template/compute/delete.go deleted file mode 100644 index 7778194e9f8..00000000000 --- a/cli/pkg/cmd/template/compute/delete.go +++ /dev/null @@ -1,62 +0,0 @@ -package compute - -import ( - "context" - "log" - "time" - - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" - "k8s.io/klog/v2" -) - -type DeleteOptions struct { - namespace string -} - -func newCmdDelete() *cobra.Command { - opts := DeleteOptions{} - - cmd := &cobra.Command{ - Use: "delete ", - Short: "Delete a compute template by name", - Args: cobra.MaximumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return deleteComputeTemplate(args[0], opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the compute template is stored") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - - return cmd -} - -func deleteComputeTemplate(name string, opts DeleteOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewComputeTemplateServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - request := &go_client.DeleteComputeTemplateRequest{ - Name: name, - Namespace: opts.namespace, - } - if _, err := client.DeleteComputeTemplate(ctx, request); err != nil { - log.Fatalf("could not delete compute template %v", err) - } - - log.Printf("compute template %v has been deleted", name) - return nil -} diff --git a/cli/pkg/cmd/template/compute/get.go b/cli/pkg/cmd/template/compute/get.go deleted file mode 100644 index 62007a62e3a..00000000000 --- a/cli/pkg/cmd/template/compute/get.go +++ /dev/null @@ -1,72 +0,0 @@ -package compute - -import ( - "context" - "log" - "os" - "time" - - "github.com/olekukonko/tablewriter" - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" - "k8s.io/klog/v2" -) - -type GetOptions struct { - namespace string -} - -func newCmdGet() *cobra.Command { - opts := GetOptions{} - - cmd := &cobra.Command{ - Use: "get ", - Short: "Get a compute template by name", - Args: cobra.MaximumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - return getComputeTemplate(args[0], opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the compute template is stored") - if err := cmd.MarkFlagRequired("namespace"); err != nil { - klog.Warning(err) - } - - return cmd -} - -func getComputeTemplate(name string, opts GetOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewComputeTemplateServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - r, err := client.GetComputeTemplate(ctx, &go_client.GetComputeTemplateRequest{ - Name: name, - Namespace: opts.namespace, - }) - if err != nil { - log.Fatalf("could not list compute template %v", err) - } - - rows := [][]string{ - convertComputeTemplatToString(r), - } - - table := tablewriter.NewWriter(os.Stdout) - table.SetHeader([]string{"Name", "CPU", "Memory", "GPU", "GPU-Accelerator"}) - table.AppendBulk(rows) - table.Render() - - return nil -} diff --git a/cli/pkg/cmd/template/compute/list.go b/cli/pkg/cmd/template/compute/list.go deleted file mode 100644 index c5c47b68f42..00000000000 --- a/cli/pkg/cmd/template/compute/list.go +++ /dev/null @@ -1,93 +0,0 @@ -package compute - -import ( - "context" - "log" - "os" - "strconv" - "time" - - "github.com/olekukonko/tablewriter" - "github.com/ray-project/kuberay/cli/pkg/cmdutil" - "github.com/ray-project/kuberay/proto/go_client" - "github.com/spf13/cobra" -) - -type ListOptions struct { - namespace string -} - -func newCmdList() *cobra.Command { - opts := ListOptions{} - - cmd := &cobra.Command{ - Use: "list", - Short: "List all compute templates", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - return listComputeTemplates(opts) - }, - } - - cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", - "kubernetes namespace where the compute template is stored") - - return cmd -} - -func listComputeTemplates(opts ListOptions) error { - // Get gRPC connection - conn, err := cmdutil.GetGrpcConn() - if err != nil { - return err - } - defer conn.Close() - - // build gRPC client - client := go_client.NewComputeTemplateServiceClient(conn) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - var computeTemplates []*go_client.ComputeTemplate - if len(opts.namespace) == 0 { - r, err := client.ListAllComputeTemplates(ctx, &go_client.ListAllComputeTemplatesRequest{}) - if err != nil { - log.Fatalf("could not list all compute templates %v", err) - } - computeTemplates = r.GetComputeTemplates() - } else { - r, err := client.ListComputeTemplates(ctx, &go_client.ListComputeTemplatesRequest{ - Namespace: opts.namespace, - }) - if err != nil { - log.Fatalf("could not list compute templates %v", err) - } - computeTemplates = r.GetComputeTemplates() - } - rows := convertComputeTemplatesToStrings(computeTemplates) - - table := tablewriter.NewWriter(os.Stdout) - table.SetHeader([]string{"Name", "Namespace", "CPU", "Memory", "GPU", "GPU-Accelerator"}) - table.AppendBulk(rows) - table.Render() - - return nil -} - -func convertComputeTemplatesToStrings(computeTemplates []*go_client.ComputeTemplate) [][]string { - var data [][]string - - for _, r := range computeTemplates { - data = append(data, convertComputeTemplatToString(r)) - } - - return data -} - -func convertComputeTemplatToString(r *go_client.ComputeTemplate) []string { - line := []string{ - r.GetName(), r.Namespace, strconv.Itoa(int(r.GetCpu())), strconv.Itoa(int(r.Memory)), - strconv.Itoa(int(r.GetGpu())), r.GetGpuAccelerator(), - } - return line -} diff --git a/cli/pkg/cmd/template/template.go b/cli/pkg/cmd/template/template.go deleted file mode 100644 index 8ead2071725..00000000000 --- a/cli/pkg/cmd/template/template.go +++ /dev/null @@ -1,21 +0,0 @@ -package template - -import ( - "github.com/ray-project/kuberay/cli/pkg/cmd/template/compute" - "github.com/spf13/cobra" -) - -func NewCmdTemplate() *cobra.Command { - cmd := &cobra.Command{ - Use: "template ", - Short: "Manage templates (compute)", - Long: ``, - Annotations: map[string]string{ - "IsCore": "true", - }, - } - - cmd.AddCommand(compute.NewCmdComputeTemplate()) - - return cmd -} diff --git a/cli/pkg/cmd/version/release.go b/cli/pkg/cmd/version/release.go deleted file mode 100644 index 4bdc4edd0da..00000000000 --- a/cli/pkg/cmd/version/release.go +++ /dev/null @@ -1,15 +0,0 @@ -package version - -// This file was generated by release_generate.go; DO NOT EDIT. - -// Version is the version number in semver format X.Y.Z -var Version = "0.1.0" - -// PreReleaseID can be empty for releases, "rc.X" for release candidates and "dev" for snapshots -var PreReleaseID = "dev" - -// gitCommit is the short commit hash. It will be set by the linker. -var gitCommit = "" - -// buildDate is the time of the build with format yyyy-mm-ddThh:mm:ssZ. It will be set by the linker. -var buildDate = "" diff --git a/cli/pkg/cmd/version/version.go b/cli/pkg/cmd/version/version.go deleted file mode 100644 index aae0906d42c..00000000000 --- a/cli/pkg/cmd/version/version.go +++ /dev/null @@ -1,83 +0,0 @@ -package version - -import ( - "encoding/json" - "fmt" - "strings" - - "github.com/spf13/cobra" -) - -func NewCmdVersion() *cobra.Command { - cmd := &cobra.Command{ - Use: "version", - Short: "Output the version of kuberay", - Long: ``, - Run: func(cmd *cobra.Command, args []string) { - fmt.Printf("%s\n", GetVersion()) - }, - } - - return cmd -} - -// Info holds version information -type Info struct { - Version string - PreReleaseID string - Metadata BuildMetadata -} - -// BuildMetadata contains the semver build metadata: -// short commit hash and date in format YYYYMMDDTHHmmSS -type BuildMetadata struct { - BuildDate string - GitCommit string -} - -// GetVersionInfo returns version Info struct -func GetVersionInfo() Info { - return Info{ - Version: Version, - PreReleaseID: PreReleaseID, - Metadata: BuildMetadata{ - GitCommit: gitCommit, - BuildDate: buildDate, - }, - } -} - -// ExtraSep separates semver version from any extra version info -const ExtraSep = "-" - -// String return version info as JSON -func String() string { - if data, err := json.Marshal(GetVersionInfo()); err == nil { - return string(data) - } - return "" -} - -// GetVersion return the exact version of this build -func GetVersion() string { - if PreReleaseID == "" { - return Version - } - - versionWithPR := fmt.Sprintf("%s%s%s", Version, ExtraSep, PreReleaseID) - - if isReleaseCandidate(PreReleaseID) || (gitCommit == "" || buildDate == "") { - return versionWithPR - } - - // Include build metadata - return fmt.Sprintf("%s+%s.%s", - versionWithPR, - gitCommit, - buildDate, - ) -} - -func isReleaseCandidate(preReleaseID string) bool { - return strings.HasPrefix(preReleaseID, "rc.") -} diff --git a/cli/pkg/cmdutil/client.go b/cli/pkg/cmdutil/client.go deleted file mode 100644 index e63ed90e0af..00000000000 --- a/cli/pkg/cmdutil/client.go +++ /dev/null @@ -1,22 +0,0 @@ -package cmdutil - -import ( - "context" - "log" - "time" - - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -func GetGrpcConn() (*grpc.ClientConn, error) { - address := GetVal("endpoint") - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - conn, err := grpc.DialContext(ctx, address, grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - log.Fatalf("can not connect: %v", err) - } - - return conn, err -} diff --git a/cli/pkg/cmdutil/configManager.go b/cli/pkg/cmdutil/configManager.go deleted file mode 100644 index 2674de2ba89..00000000000 --- a/cli/pkg/cmdutil/configManager.go +++ /dev/null @@ -1,44 +0,0 @@ -package cmdutil - -import ( - "fmt" - "os" - "reflect" - - "github.com/spf13/viper" -) - -var supportedKeys = map[string]bool{"endpoint": true} - -func validateKey(key string) { - _, ok := supportedKeys[key] - if !ok { - keys := reflect.ValueOf(supportedKeys).MapKeys() - fmt.Printf("key %s is not supported, supported keys are: %v", key, keys) - os.Exit(1) - } -} - -func SetKeyValPair(key string, value string) { - validateKey(key) - viper.Set(key, value) - if err := viper.WriteConfig(); err != nil { - fmt.Printf("Not able to write to config file %s\n", viper.ConfigFileUsed()) - fmt.Println(err) - os.Exit(1) - } -} - -func GetVal(key string) string { - validateKey(key) - return viper.GetString(key) -} - -func Reset() { - viper.Set("endpoint", fmt.Sprintf("%s:%s", DefaultRpcAddress, DefaultRpcPort)) - if err := viper.WriteConfig(); err != nil { - fmt.Printf("Not able to write to config file %s\n", viper.ConfigFileUsed()) - fmt.Println(err) - os.Exit(1) - } -} diff --git a/cli/pkg/cmdutil/const.go b/cli/pkg/cmdutil/const.go deleted file mode 100644 index 2ccce854091..00000000000 --- a/cli/pkg/cmdutil/const.go +++ /dev/null @@ -1,6 +0,0 @@ -package cmdutil - -const ( - DefaultRpcAddress = "127.0.0.1" - DefaultRpcPort = "8887" -) diff --git a/cli/pkg/cmdutil/errors.go b/cli/pkg/cmdutil/errors.go deleted file mode 100644 index 77ca58340a8..00000000000 --- a/cli/pkg/cmdutil/errors.go +++ /dev/null @@ -1,19 +0,0 @@ -package cmdutil - -import "errors" - -// FlagError is the kind of error raised in flag processing -type FlagError struct { - Err error -} - -func (fe FlagError) Error() string { - return fe.Err.Error() -} - -func (fe FlagError) Unwrap() error { - return fe.Err -} - -// SilentError is an error that triggers exit code 1 without any error messaging -var SilentError = errors.New("SilentError") diff --git a/clients/python-apiserver-client/.gitignore b/clients/python-apiserver-client/.gitignore index f2b07cb0b37..d6d73f9c8c8 100644 --- a/clients/python-apiserver-client/.gitignore +++ b/clients/python-apiserver-client/.gitignore @@ -32,4 +32,4 @@ htmlcov .coverage .cache nosetests.xml -coverage.xml \ No newline at end of file +coverage.xml diff --git a/clients/python-apiserver-client/LICENSE b/clients/python-apiserver-client/LICENSE deleted file mode 100644 index 1dcfa84a3fb..00000000000 --- a/clients/python-apiserver-client/LICENSE +++ /dev/null @@ -1,272 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- - -Code in python/ray/rllib/{evolution_strategies, dqn} adapted from -https://github.com/openai (MIT License) - -Copyright (c) 2016 OpenAI (http://openai.com) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - --------------------------------------------------------------------------------- - -Code in python/ray/rllib/impala/vtrace.py from -https://github.com/deepmind/scalable_agent - -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- -Code in python/ray/rllib/ars is adapted from https://github.com/modestyachts/ARS - -Copyright (c) 2018, ARS contributors (Horia Mania, Aurelia Guy, Benjamin Recht) -All rights reserved. - -Redistribution and use of ARS in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation and/or -other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/clients/python-apiserver-client/README.md b/clients/python-apiserver-client/README.md index c141865bc58..29d7c48520c 100644 --- a/clients/python-apiserver-client/README.md +++ b/clients/python-apiserver-client/README.md @@ -2,9 +2,9 @@ This Python client is currently only supporting Ray cluster management through usage of the `API server` Ray API. It implements all of the current functionality of the API server (and the operator) and provide pythonic APIs to the capabilities. -The package supports Python objects (documented in the code ) that can be used to build and receive payloads for creation, listing and deleting of [template](https://ray-project.github.io/kuberay/components/apiserver/#compute-template) and Ray clusters. +The package supports Python objects (documented in the code) that can be used to build and receive payloads for creation, listing and deleting of [template](https://ray-project.github.io/kuberay/components/apiserver/#compute-template) and Ray clusters. -The main class of the package is [KubeRayAPIs](python_apiserver_client/kuberay_apis.py) that implements all of the functionality. It leverages [templates](python_apiserver_client/params/templates.py) and [cluster](python_apiserver_client/params/cluster.py) definitions, allowing to specify all required parameters as straight Python classes. Additional (intermediate) definitions are provided (see [environment variables](python_apiserver_client/params/environmentvariables.py), [volumes](python_apiserver_client/params/volumes.py), [head group](python_apiserver_client/params/headnode.py) and [worker group](python_apiserver_client/params/workernode.py)) +The main class of the package is [KubeRayAPIs](src/python_apiserver_client/kuberay_apis.py) that implements all of the functionality. It leverages [templates](src/python_apiserver_client/params/templates.py) and [cluster](src/python_apiserver_client/params/cluster.py) definitions, allowing to specify all required parameters as straight Python classes. Additional (intermediate) definitions are provided (see [cluster](src/python_apiserver_client/params/cluster.py), [environment variables](src/python_apiserver_client/params/environmentvariables.py), [volumes](src/python_apiserver_client/params/volumes.py), [head group](src/python_apiserver_client/params/headnode.py), [worker group](src/python_apiserver_client/params/workernode.py) and [job submission](src/python_apiserver_client/params/jobsubmission.py)) ## Prerequisites @@ -26,6 +26,22 @@ pip3 install -e . ## Testing -Test files [parameters_test](python_apiserver_client_test/api_params_test.py) and [api_test](python_apiserver_client_test/kuberay_api_test.py) exercise the package functionality and can also be used as a guide for API usage. +To do testing first create the current images for operator and API server, create kind cluster and install operator and API server. +From apiserver directory, execute: -Note that [api_test](python_apiserver_client_test/kuberay_api_test.py) requires installation of the package and creation of the additional [configmap](../../apiserver/test/job/code.yaml) in the default namespace +```shell +make operator-image cluster load-operator-image deploy-operator docker-image load-image install +``` + +Now you can use the following test files: + +* [parameters_test](test/api_params_test.py) exercise parameter creation +* [api_test](test/kuberay_api_test.py) exercise overall package functionality and can also be used as a guide for API usage. + +## Clean up + +From apiserver directory, execute: + +```shell +make clean-cluster +``` diff --git a/clients/python-apiserver-client/pyproject.toml b/clients/python-apiserver-client/pyproject.toml index dfffe74cabd..b6dec011553 100644 --- a/clients/python-apiserver-client/pyproject.toml +++ b/clients/python-apiserver-client/pyproject.toml @@ -1,21 +1,21 @@ [build-system] -requires = ["setuptools", "wheel"] +requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"] build-backend = "setuptools.build_meta" -[tool.setuptools.packages.find] -include = ["python_apiserver_client*"] -exclude = ["python_apiserver_client_test*"] +[options] +package_dir = ["src"] [project] name = "python_apiserver_client" version = "0.0.1" dependencies = [ "requests", + "kubernetes", ] authors = [ { name="KubeRay project"}, ] description = "A Kuberay python client library to manage clusters based on the KubeRay API server" -readme = "README.md" -license = {file = "LICENSE"} +readme = {file = "README.md", content-type = "text/markdown"} +license = {text = "Apache-2.0"} requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", @@ -24,4 +24,4 @@ classifiers = [ ] [project.urls] -"Homepage" = "https://github.com/ray-project/kuberay" \ No newline at end of file +"Homepage" = "https://github.com/ray-project/kuberay" diff --git a/clients/python-apiserver-client/python_apiserver_client/__init__.py b/clients/python-apiserver-client/python_apiserver_client/__init__.py deleted file mode 100644 index 860a9027921..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .kuberay_apis import * diff --git a/clients/python-apiserver-client/python_apiserver_client/params/__init__.py b/clients/python-apiserver-client/python_apiserver_client/params/__init__.py deleted file mode 100644 index 45143fc0f2c..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/params/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .templates import * -from .cluster import * -from .headnode import * -from .workernode import * -from .volumes import * -from .environmentvariables import * -from .jobsubmission import * \ No newline at end of file diff --git a/clients/python-apiserver-client/python_apiserver_client/params/cluster.py b/clients/python-apiserver-client/python_apiserver_client/params/cluster.py deleted file mode 100644 index 39a1845cb62..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/params/cluster.py +++ /dev/null @@ -1,193 +0,0 @@ -from .headnode import * -from .workernode import * - -class Environment(enum.Enum): - DEV = 0 - TESTING = 1 - STAGING = 2 - PRODUCTION = 3 - - -class ClusterSpec: - """ - ClusterSpec is used to define Ray cluster. - It provides APIs to create, stringify, convert to dict and json. - - Methods: - - Create cluster spec from: gets the following parameters: - head_group_spec - required, specification of the head node - worker_group_spec - optional, list of worker group specs - - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict - """ - def __init__(self, head_node: HeadNodeSpec, worker_groups: list[WorkerNodeSpec] = None) -> None: - self.head_node = head_node - self.worker_groups = worker_groups - - def to_string(self) -> str: - val = f"head_group_spec: {self.head_node.to_string()}" - if self.worker_groups is not None: - val += "\nworker groups: " - for w in self.worker_groups: - val += f"\nworker_group_spec = {w.to_string()}]" - return val - - def to_dict(self) -> dict[str, any]: - dst = {"headGroupSpec": self.head_node.to_dict()} - if self.worker_groups is not None: - dst["workerGroupSpec"] = [w.to_dict() for w in self.worker_groups] - return dst - - -class ClusterEvent: - """ - Cluster event is used to define events emitted during cluster creation. - It provides APIs to create and stringify. Its output only data, so we do not need to implement to_dict - - Methods: - - Create event: gets the dictionary with the following parameters: - id - unique Event Id - name - human readable event name - created_at - event creation time - first_timestamp - first time the event occur - last_timestamp - last time the event occur - reason - reason for the transition into the object's current status - message - human-readable description of the status of this operation - type - type of this event (Normal, Warning), new types could be added in the future - count - number of times this event has occurred - """ - def __init__(self, dst: dict[str, any]) -> None: - self.id = dst.get("id", "") - self.name = dst.get("name", "") - self.created_at = dst.get("created_at", "") - self.first_timestamp = dst.get("first_timestamp", "") - self.last_timestamp = dst.get("last_timestamp", "") - self.reason = dst.get("reason", "") - self.message = dst.get("message", "") - self.type = dst.get("type", "") - self.count = dst.get("count", "0") - - def to_string(self) -> str: - return (f"id = {self.id}, name = {self.name}, created_at = {self.created_at}, " - f"first_timestamp = {self.first_timestamp}, last_timestamp = {self.last_timestamp}," - f"reason = {self.reason}, message = {self.message}, type = {self.type}, count = {self.count}") - - -class Cluster: - """ - Cluster is used to define Ray cluster. - It provides APIs to create, stringify, convert to dict and json. - - Methods: - - Create env variable from: gets the following parameters: - name - required, unique (per namespace) cluster name - namespace - required, cluster's namespace (should exist) - user - required, user who owns the cluster - version - required, Ray cluster version - typically Ray version - deployment_environment - optional (see Environment) - cluster_spec - required, ray cluster configuration - annotations - optional, annotations, for example, "kubernetes.io/ingress.class" to define Ingress class - cluster_environment - optional, cluster environment variables - created_at - output, cluster creation ts - deleted_at - output, cluster deletion ts - cluster_status - output, cluster status - events - output, cluster events - service_endpoint - output, cluster service endpoints - - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict - """ - def __init__(self, name: str, namespace: str, user: str, version: str, cluster_spec: ClusterSpec, - deployment_environment: Environment = None, annotations: dict[str, str] = None, - cluster_environment: EnvironmentVariables = None, created_at: str = None, - deleted_at: str = None, cluster_status: str = None, events: list[ClusterEvent] = None, - service_endpoint: dict[str, str] = None) -> None: - self.name = name - self.namespace = namespace - self.user = user - self.version = version - self.cluster_spec = cluster_spec - self.environment = deployment_environment - self.annotations = annotations - self.envs = cluster_environment - self.created_at = created_at - self.deleted_at = deleted_at - self.cluster_status = cluster_status - self.events = events - self.service_endpoint = service_endpoint - - def to_string(self) -> str: - val = (f"name: {self.name}, namespace = {self.namespace}, user = {self.user}, version = {self.version} " - f"cluster_spec = {self.cluster_spec.to_string()}") - if self.environment is not None: - val += f"deployment environment = {self.environment.name}" - if self.annotations is not None: - val += f" ,annotations = {str(self.annotations)}" - if self.envs is not None: - val = val + f",cluster environment = {self.envs.to_string()}" - val += "\ncluster output\n" - if self.created_at is not None: - val += f" ,created_at = {self.created_at}" - if self.deleted_at is not None: - val += f" ,deleted_at = {self.deleted_at}" - if self.cluster_status is not None: - val += f" ,cluster status = {self.cluster_status}" - if self.events is not None: - val = val + ",\n cluster events = [" - first = True - for e in self.events: - if first: - first = False - else: - val += ", " - val = val + "{" + e.to_string() + "}" - val = val + "]" - if self.service_endpoint is not None: - val += f" ,service endpoints = {str(self.service_endpoint)}" - return val - - def to_dict(self) -> dict[str, any]: - # only convert input variables - dst = {"name": self.name, "namespace": self.namespace, "user": self.user, "version": self.version, - "clusterSpec": self.cluster_spec.to_dict()} - if self.environment is not None: - dst["environment"] = self.environment.value - if self.annotations is not None: - dst["annotations"] = self.annotations - if self.envs is not None: - dst["envs"] = self.envs.to_dict() - return dst - - -""" - Creates new cluster from dictionary, used for unmarshalling json. Python does not - support multiple constructors, so do it this way -""" - - -def cluster_spec_decoder(dct: dict[str, any]) -> ClusterSpec: - workers = None - if "workerGroupSpec" in dct: - workers = [worker_node_spec_decoder(w) for w in dct["workerGroupSpec"]] - return ClusterSpec(head_node=head_node_spec_decoder(dct.get("headGroupSpec")), worker_groups=workers) - - -def cluster_decoder(dct: dict[str, any]) -> Cluster: - environment = None - if "environment" in dct: - environment = Environment(int(dct.get("environment", "0"))) - events = None - if "events" in dct: - events = [ClusterEvent(c) for c in dct["events"]] - envs = None - if "envs" in dct: - envs = environmentvariables_decoder(dct.get("envs")) - return Cluster(name=dct.get("name", ""), namespace=dct.get("namespace", ""), user=dct.get("user", ""), - version=dct.get("version", ""), cluster_spec=cluster_spec_decoder(dct.get("clusterSpec")), - deployment_environment=environment, annotations=dct.get("annotations"), - cluster_environment=envs, created_at=dct.get("createdAt"), deleted_at=dct.get("deletedAt"), - cluster_status=dct.get("clusterState"), events=events, - service_endpoint=dct.get("serviceEndpoint")) - - -def clusters_decoder(dct: dict[str, any]) -> list[Cluster]: - return [cluster_decoder(cluster) for cluster in dct["clusters"]] diff --git a/clients/python-apiserver-client/python_apiserver_client/params/environmentvariables.py b/clients/python-apiserver-client/python_apiserver_client/params/environmentvariables.py deleted file mode 100644 index 49762a7fcdc..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/params/environmentvariables.py +++ /dev/null @@ -1,102 +0,0 @@ -import enum - - -class EnvarSource(enum.Enum): - CONFIGMAP = 0 - SECRET = 1 - RESOURCEFIELD = 2 - FIELD = 3 - - -class EnvVarFrom: - """ - EnvVarFrom is used to define an environment variable from one of the sorces (EnvarSource). - It provides APIs to create, stringify, convert to dict and json. - - Methods: - - Create env variable from: gets the following parameters: - Source required - source of environment variable - name required name for config map or secret, container name for resource, path for field - key required Key for config map or secret, resource name for resource - - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict - """ - def __init__(self, source: EnvarSource, name: str, key: str) -> None: - self.source = source - self.name = name - self.key = key - - def to_string(self) -> str: - return f"source = {self.source.name}, name = {self.name}, key = {self.key}" - - def to_dict(self) -> dict[str, any]: - return {"source": self.source.value, "name": self.name, "key": self.key} - - -class EnvironmentVariables: - """ - EnvironmentVariables is used to define environment variables. - It provides APIs to create, stringify, convert to dict and json. - - Methods: - - Create env variable from: gets the following parameters: - keyvalue - optional, dictionary of key/value environment variables - fromref - optional, dictionary of reference environment variables - - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict - """ - def __init__(self, keyvalue: dict[str, str] = None, fromref: dict[str, EnvVarFrom] = None) -> None: - self.keyval = keyvalue - self.fromref = fromref - - def to_string(self) -> str: - val = "" - if self.keyval is not None: - val = f"values = {str(self.keyval)}" - if self.fromref is not None: - if val != "": - val += " , " - val += "valuesFrom = {" - first = True - for k, v in self.fromref.items(): - if not first: - val += ", " - else: - first = False - val += f"{k} = [{v.to_string()}]" - val += "}" - return val - - def to_dict(self) -> dict[str, any]: - dst = {} - if self.keyval is not None: - dst["values"] = self.keyval - if self.fromref is not None: - fr = {} - for k, v in self.fromref.items(): - fr[k] = v.to_dict() - dst["valuesFrom"] = fr - return dst - - -""" - Creates new environment variable from from dictionary, used for unmarshalling json. Python does not - support multiple constructors, so do it this way -""" - - -def envvarfrom_decoder(dct: dict[str, any]) -> EnvVarFrom: - return EnvVarFrom(name=dct.get("name", ""), source=EnvarSource(int(dct.get("source", 0))), key=dct.get("key", "")) - - -def environmentvariables_decoder(dst: dict[str, any]) -> EnvironmentVariables: - keyvalues = None - fr = None - if "values" in dst: - keyvalues = dst.get("values") - if "valuesFrom" in dst: - fromref = dst.get("valuesFrom") - fr = {} - for k, v in fromref.items(): - fr[k] = envvarfrom_decoder(v) - return EnvironmentVariables(keyvalue=keyvalues, fromref=fr) diff --git a/clients/python-apiserver-client/python_apiserver_client/params/volumes.py b/clients/python-apiserver-client/python_apiserver_client/params/volumes.py deleted file mode 100644 index 6100be229b8..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/params/volumes.py +++ /dev/null @@ -1,284 +0,0 @@ -import enum - - -class HostPath(enum.Enum): - DIRECTORY = 0 - FILE = 1 - - -class MountPropagationMode(enum.Enum): - NONE = 0 - HOSTTOCONTAINER = 1 - BIDIRECTIONAL = 2 - - -class AccessMode(enum.Enum): - RWO = 0 - ROX = 1 - RWX = 2 - - -class BaseVolume: - """ - KubeRay currently support several types of volumes, including hostPat, PVC, - ephemeral volumes, config maps, secrets and empty dir. All of them use slightly - different parameters. Base Volume is a base class for all different volume types. - """ - def to_string(self) -> str: - raise Exception(f"Base volume cannot be used directly. Pls use one of the derived classes") - - def to_dict(self) -> dict[str, any]: - raise Exception(f"Base volume cannot be used directly. Pls use one of the derived classes") - - -class HostPathVolume(BaseVolume): - """ - This class implements HostPath volume. In addition to name and mount path it requires host - path volume specific parameters: - source - data location on host - hostPathType - host path type: directory (0) or file (1) - mountPropagationMode - mount propagation: None (0), host to container (1) or bidirectional (2) - - """ - def __init__(self, name: str, mount_path: str, source: str, hostpathtype: HostPath = None, - mountpropagation: MountPropagationMode = None) -> None: - self.name = name - self.mount_path = mount_path - self.source = source - self.hostpathtype = hostpathtype - self.volume_type = 1 - self.mountpropagation = mountpropagation - - def to_string(self) -> str: - val = (f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " - f"volume type = hostPath") - if self.mountpropagation is not None: - val += f", mount propagation = {self.mountpropagation.name}" - if self.hostpathtype is not None: - val += f", host path type = {self.hostpathtype.name}" - return val - - def to_dict(self) -> dict[str, any]: - dst = {"name": self.name, "mountPath": self.mount_path, "source": self.source, - "volumeType": self.volume_type} - if self.mountpropagation is not None: - dst["mountPropagationMode"] = self.mountpropagation.value - if self.hostpathtype is not None: - dst["hostPathType"] = self.hostpathtype.value - return dst - - -class PVCVolume(BaseVolume): - """ - This class implements PVC volume. In addition to name and mount path it requires - PVC volume specific parameters: - source - PVC claim name - read_only - read only flag - mountPropagationMode - mount propagation: None (0), host to container (1) or bidirectional (2) - """ - def __init__(self, name: str, mount_path: str, source: str, read_only: bool = False, - mountpropagation: MountPropagationMode = None) -> None: - self.name = name - self.mount_path = mount_path - self.source = source - self.volume_type = 0 - self.mountpropagation = mountpropagation - self.readonly = read_only - - def to_string(self) -> str: - val = (f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " - f"volume type = PVC") - if self.readonly: - val += ", read only = True" - if self.mountpropagation is not None: - val += f", mount propagation = {self.mountpropagation.name}" - return val - - def to_dict(self) -> dict[str, any]: - dst = {"name": self.name, "mountPath": self.mount_path, "source": self.source, - "volumeType": self.volume_type} - if self.readonly: - dst["readOnly"] = True - if self.mountpropagation is not None: - dst["mountPropagationMode"] = self.mountpropagation.value - return dst - - -class EphemeralVolume(BaseVolume): - """ - This class implements Ephemeral volume. In addition to name and mount path it requires - Ephemeral volume specific parameters: - storage - disk size (valid k8 value, for example 5Gi) - storageClass - storage class - optional, if not specified, use default - accessMode - access mode RWO - optional ReadWriteOnce (0), ReadOnlyMany (1), ReadWriteMany (2) - mountPropagationMode - optional mount propagation: None (0), host to container (1) or bidirectional (2) - """ - def __init__(self, name: str, mount_path: str, storage: str, storage_class: str = None, - accessmode: AccessMode = None, mountpropagation: MountPropagationMode = None) -> None: - self.name = name - self.mount_path = mount_path - self.storage = storage - self.volume_type = 2 - self.mountpropagation = mountpropagation - self.storageclass = storage_class - self.accessmode = accessmode - - def to_string(self) -> str: - val = (f"name = {self.name}, mount_path = {self.mount_path}, storage = {self.storage} " - f"volume type = ephemeral") - if self.storageclass is not None: - val += f", storage class = {self.storageclass}" - if self.accessmode is not None: - val += f", access mode = {self.accessmode.name}" - if self.mountpropagation is not None: - val += f", mount propagation = {self.mountpropagation.name}" - return val - - def to_dict(self) -> dict[str, any]: - dct = {"name": self.name, "mountPath": self.mount_path, "storage": self.storage, - "volumeType": self.volume_type} - if self.storageclass is not None: - dct["storageClassName"] = self.storageclass - if self.accessmode is not None: - dct["accessMode"] = self.accessmode.value - if self.mountpropagation is not None: - dct["mountPropagationMode"] = self.mountpropagation.value - return dct - - -class EmptyDirVolume(BaseVolume): - """ - This class implements EmptyDir volume. In addition to name and mount path it requires - Empty Dir specific parameters: - storage - optional max storage size (valid k8 value, for example 5Gi) - """ - def __init__(self, name: str, mount_path: str, storage: str = None) -> None: - self.name = name - self.mount_path = mount_path - self.storage = storage - self.volume_type = 5 - - def to_string(self) -> str: - val = f"name = {self.name}, mount_path = {self.mount_path}, volume type = emptyDir" - if self.storage is not None: - val += f", storage = {self.storage}" - return val - - def to_dict(self) -> dict[str, any]: - dct = {"name": self.name, "mountPath": self.mount_path, "volumeType": self.volume_type} - if self.storage is not None: - dct["storage"] = self.storage - return dct - - -class ConfigMapVolume(BaseVolume): - """ - This class implements ConfigMap volume. In addition to name and mount path it requires - configMap volume specific parameters: - source - required, config map name - items - optional, key/path items (optional) - """ - def __init__(self, name: str, mount_path: str, source: str, items: dict[str, str] = None,) -> None: - self.name = name - self.mount_path = mount_path - self.source = source - self.items = items - self.volume_type = 3 - - def to_string(self) -> str: - val = (f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " - f"volume type = configmap") - if self.items is not None: - val = val + f", itemss = {str(self.items)}" - return val - - def to_dict(self) -> dict[str, any]: - dct = {"name": self.name, "mountPath": self.mount_path, "source": self.source, - "volumeType": self.volume_type} - if self.items is not None: - dct["items"] = self.items - return dct - - -class SecretVolume(BaseVolume): - """ - This class implements Secret volume. In addition to name and mount path it requires - Secret volume specific parameters: - source - required, secret name - items - optional, key/path items (optional) - """ - def __init__(self, name: str, mount_path: str, source: str, items: dict[str, str] = None,) -> None: - self.name = name - self.mount_path = mount_path - self.source = source - self.items = items - self.volume_type = 4 - - def to_string(self) -> str: - val = (f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " - f"volume type = secret") - if self.items is not None: - val = val + f", itemss = {str(self.items)}" - return val - - def to_dict(self) -> dict[str, any]: - dct = {"name": self.name, "mountPath": self.mount_path, "source": self.source, - "volumeType": self.volume_type} - if self.items is not None: - dct["items"] = self.items - return dct - - -""" - Creates new Volume from dictionary, used for unmarshalling json. Python does not - support multiple constructors, so do it this way -""" - - -def volume_decoder(dst: dict[str, any]) -> BaseVolume: - - def _getmountpropagatio() -> MountPropagationMode: - if "mountPropagationMode" in dst: - return MountPropagationMode(int(dst.get("mountPropagationMode", "0"))) - return None - - def _gethostpathtype() -> HostPath: - if "hostPathType" in dst: - return HostPath(int(dst.get("hostPathType", "0"))) - return None - - def _getaccessmode() -> AccessMode: - if "accessMode" in dst: - return AccessMode(int(dst.get("accessMode", "0"))) - return None - - match dst["volumeType"]: - case 0: - # PVC - return PVCVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - source=dst.get("source", ""), read_only=dst.get("readOnly", False), - mountpropagation=_getmountpropagatio()) - case 1: - # hostpath - return HostPathVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - source=dst.get("source", ""), hostpathtype=_gethostpathtype(), - mountpropagation=_getmountpropagatio()) - case 2: - # Ephemeral volume - return EphemeralVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - storage=dst.get("storage", ""), storage_class=dst.get("storageClassName"), - accessmode=_getaccessmode(), mountpropagation=_getmountpropagatio()) - case 3: - # ConfigMap Volume - return ConfigMapVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - source=dst.get("source", ""), items=dst.get("items")) - case 4: - # Secret Volume - return SecretVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - source=dst.get("source", ""), items=dst.get("items")) - case 5: - # Empty dir volume - return EmptyDirVolume(name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), - storage=dst.get("storage")) - case default: - raise Exception(f"Unknown volume type in {dst}") diff --git a/clients/python-apiserver-client/python_apiserver_client/params/workernode.py b/clients/python-apiserver-client/python_apiserver_client/params/workernode.py deleted file mode 100644 index 18926364c75..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client/params/workernode.py +++ /dev/null @@ -1,122 +0,0 @@ -from .volumes import * -from .environmentvariables import * - -DEFAULT_WORKER_START_PARAMS = {"node-ip-address": "$MY_POD_IP"} - - -class WorkerNodeSpec: - """ - WorkerNodeSpec is used to define Ray cluster worker node pool configuration. - It provides APIs to create, stringify and convert to dict. - - Methods: - - Create worker node pool specification: gets the following parameters: - group_name - required, group name of the worker group - compute_template - required, the computeTemplate of head node group - replicas - required, desired replicas of the worker group - min_replicas - required Min replicas of the worker group, can't be greater than max_replicas - max_replicas - required, max replicas of the worker group - ray_start_params - required, Ray start parameters - image - optional, image used for head node - volumes - optional, a list of volumes to attach to head node - service_account - optional, a service account (has to exist) to run head node - image_pull_secret - optional, secret to pull head node image from registry - environment - optional, environment variables for head pod - annotations - optional, annotations for head node - labels - optional, labels for head node - """ - def __init__(self, group_name: str, compute_template: str, replicas: int, min_replicas: int, max_replicas: int, - ray_start_params: dict[str, str], image: str = None, - volumes: list[BaseVolume] = None, service_account: str = None, image_pull_secret: str = None, - environment: EnvironmentVariables = None, annotations: dict[str, str] = None, - labels: dict[str, str] = None) -> None: - - # Validate replicas - if min_replicas > replicas: - raise RuntimeError(f"min_replicas {min_replicas} is can't be greater then replicas {replicas} ") - if replicas > max_replicas: - raise RuntimeError(f"replicas {replicas} is can't be greater then max_replicas {max_replicas} ") - - self.group_name = group_name - self.compute_template = compute_template - self.replicas = replicas - self.min_replicas = min_replicas - self.max_replicas = max_replicas - self.ray_start_params = ray_start_params - self.ray_start_params.update(DEFAULT_WORKER_START_PARAMS) - self.image = image - self.volumes = volumes - self.service_account = service_account - self.image_pull_secret = image_pull_secret - self.environment = environment - self.annotations = annotations - self.labels = labels - - def to_string(self) -> str: - val = (f"group_name = {self.group_name}, compute template = {self.compute_template}, " - f"replicas = {self.replicas}, min_replicas = {self.min_replicas}, " - f"max_replicas = {self.max_replicas}, ray start params = {str(self.ray_start_params)}") - if self.image is not None: - val += f", image = {self.image}" - if self.service_account is not None: - val += f", service_account = {self.service_account}" - if self.image_pull_secret is not None: - val += f", image_pull_secret = {self.image_pull_secret}" - if self.volumes is not None: - val = val + ",\n volumes = [" - first = True - for v in self.volumes: - if first: - first = False - else: - val += ", " - val = val + "{" + v.to_string() + "}" - val = val + "]" - if self.environment is not None: - val = val + f",\n environment = {self.environment.to_string()}" - if self.annotations is not None: - val = val + f",\n annotations = {str(self.annotations)}" - if self.labels is not None: - val = val + f",\n labels = {str(self.labels)}" - return val - - def to_dict(self) -> dict[str, any]: - dct = {"groupName": self.group_name, "computeTemplate": self.compute_template, - "replicas": self.replicas, "minReplicas": self.min_replicas, "maxReplicas": self.max_replicas, - "rayStartParams": self.ray_start_params} - if self.image is not None: - dct["image"] = self.image - if self.service_account is not None: - dct["service_account"] = self.service_account - if self.image_pull_secret is not None: - dct["imagePullSecret"] = self.image_pull_secret - if self.volumes is not None: - dct["volumes"] = [v.to_dict() for v in self.volumes] - if self.environment is not None: - dct["environment"] = self.environment.to_dict() - if self.annotations is not None: - dct["annotations"] = self.annotations - if self.labels is not None: - dct["labels"] = self.labels - return dct - - -""" - Creates new head node from dictionary, used for unmarshalling json. Python does not - support multiple constructors, so do it this way -""" - - -def worker_node_spec_decoder(dct: dict[str, any]) -> WorkerNodeSpec: - volumes = None - if "volumes" in dct: - volumes = [volume_decoder(v) for v in dct["volumes"]] - environments = None - if "environment" in dct and len(dct.get("environment")) > 0: - environments = environmentvariables_decoder(dct.get("environment")) - return WorkerNodeSpec(group_name=dct.get("groupName"), compute_template=dct.get("computeTemplate"), - replicas=dct.get("replicas", 0), min_replicas=dct.get("minReplicas", 0), - max_replicas=dct.get("maxReplicas", 0), ray_start_params=dct.get("rayStartParams"), - image=dct.get("image"), volumes=volumes, service_account=dct.get("service_account"), - image_pull_secret=dct.get("imagePullSecret"), environment=environments, - annotations=dct.get("annotations"), labels=dct.get("labels")) diff --git a/clients/python-apiserver-client/python_apiserver_client_test/api_params_test.py b/clients/python-apiserver-client/python_apiserver_client_test/api_params_test.py deleted file mode 100644 index a8e63455249..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client_test/api_params_test.py +++ /dev/null @@ -1,237 +0,0 @@ -import json -from python_apiserver_client import * - - -def test_toleration(): - - tol1 = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) - print(f"\ntoleration 1: {tol1.to_string()}") - t1_json = json.dumps(tol1.to_dict()) - print(f"toleration 1 JSON: {t1_json}") - - tol2 = Toleration(key="blah2", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute, - value="value") - print(f"toleration 2: {tol2.to_string()}") - t2_json = json.dumps(tol2.to_dict()) - print(f"toleration 2 JSON: {t2_json}") - - assert tol1.to_string() == toleration_decoder(json.loads(t1_json)).to_string() - assert tol2.to_string() == toleration_decoder(json.loads(t2_json)).to_string() - - -def test_templates(): - - tol1 = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) - tol2 = Toleration(key="blah2", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute, - value="value") - - temp1 = Template(name="template1", namespace="namespace", cpu=1, memory=4, tolerations=[tol1, tol2]) - print(f"\ntemplate 1: {temp1.to_string()}") - tm1_json = json.dumps(temp1.to_dict()) - print(f"template 1 JSON: {tm1_json}") - - temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1) - print(f"template 2: {temp2.to_string()}") - tm2_json = json.dumps(temp2.to_dict()) - print(f"template 2 JSON: {tm2_json}") - - assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string() - assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string() - - -def test_volumes(): - - # hostPath - vol = HostPathVolume(name="hostPath", mount_path="tmp/hostPath", source="source", - hostpathtype=HostPath.FILE, mountpropagation=MountPropagationMode.NONE) - print(f"\nhostPath volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"host path volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - vol = PVCVolume(name="pvc", mount_path="tmp/pvc", source="claim", read_only=True, - mountpropagation=MountPropagationMode.BIDIRECTIONAL) - print(f"PVC volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"PVC volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - vol = EphemeralVolume(name="ephemeral", mount_path="tmp/ephemeral", storage="5Gi", storage_class="blah", - accessmode=AccessMode.RWX) - print(f"Ephemeral volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"Ephemeral volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - vol = EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir") - print(f"Empty dir volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"Empty dir volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - vol = ConfigMapVolume(name="confmap", mount_path="tmp/confmap", source="my-map", - items={"sample_code.py": "sample_code.py"}) - print(f"config map volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"config map volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - vol = SecretVolume(name="secret", mount_path="tmp/secret", source="my-secret") - print(f"secret volume: {vol.to_string()}") - vol_json = json.dumps(vol.to_dict()) - print(f"secret volume json: {vol_json}") - assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() - - -def test_environment(): - - env_v = EnvVarFrom(source=EnvarSource.SECRET, name="my-secret", key="key") - print(f"\nEnv variable from: {env_v.to_string()}") - env_v_json = json.dumps(env_v.to_dict()) - print(f"Env variable from JSON: {env_v_json}") - assert envvarfrom_decoder(json.loads(env_v_json)).to_string() == env_v.to_string() - - envs = EnvironmentVariables(keyvalue={"key": "val"}, fromref={"key_ref": env_v}) - print(f"Env variables: {envs.to_string()}") - envs_json = json.dumps(envs.to_dict()) - print(f"Env variables JSON: {envs_json}") - assert environmentvariables_decoder(json.loads(envs_json)).to_string() == envs.to_string() - - envs = EnvironmentVariables(fromref={"key_ref": env_v}) - print(f"Env variables: {envs.to_string()}") - envs_json = json.dumps(envs.to_dict()) - print(f"Env variables JSON: {envs_json}") - assert environmentvariables_decoder(json.loads(envs_json)).to_string() == envs.to_string() - - envs = EnvironmentVariables(keyvalue={"key": "val"}) - print(f"Env variables: {envs.to_string()}") - envs_json = json.dumps(envs.to_dict()) - print(f"Env variables JSON: {envs_json}") - assert environmentvariables_decoder(json.loads(envs_json)).to_string() == envs.to_string() - - -def test_head_node_spec(): - - env_v = EnvVarFrom(source=EnvarSource.SECRET, name="my-secret", key="key") - env_s = EnvironmentVariables(keyvalue={"key": "val"}, fromref={"key_ref": env_v}) - volumes = [PVCVolume(name="pvc", mount_path="tmp/pvc", source="claim", read_only=True, - mountpropagation=MountPropagationMode.BIDIRECTIONAL), - EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir")] - - head = HeadNodeSpec(compute_template="template", ray_start_params=DEFAULT_HEAD_START_PARAMS, - enable_ingress=True, service_type=ServiceType.ClusterIP, volumes=volumes, - environment=env_s) - print(f"\nhead node: {head.to_string()}") - head_json = json.dumps(head.to_dict()) - print(f"head node JSON: {head_json}") - assert head_node_spec_decoder(json.loads(head_json)).to_string() == head.to_string() - - -def test_worker_node_spec(): - - env_v = EnvVarFrom(source=EnvarSource.SECRET, name="my-secret", key="key") - env_s = EnvironmentVariables(keyvalue={"key": "val"}, fromref={"key_ref": env_v}) - volumes = [PVCVolume(name="pvc", mount_path="tmp/pvc", source="claim", read_only=True, - mountpropagation=MountPropagationMode.BIDIRECTIONAL), - EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir")] - - worker = WorkerNodeSpec(group_name="group", compute_template="template", replicas=2, min_replicas=2, - max_replicas=2, ray_start_params=DEFAULT_WORKER_START_PARAMS, volumes=volumes, - environment=env_s, labels={"key": "value"}) - print(f"\nworker node: {worker.to_string()}") - worker_json = json.dumps(worker.to_dict()) - print(f"worker node JSON: {worker_json}") - assert worker_node_spec_decoder(json.loads(worker_json)).to_string() == worker.to_string() - - -def test_cluster_spec(): - env_s = EnvironmentVariables(keyvalue={"key": "val"}, - fromref={"key_ref": EnvVarFrom(source=EnvarSource.SECRET, - name="my-secret", key="key")}) - volumes = [PVCVolume(name="pvc", mount_path="tmp/pvc", source="claim", read_only=True, - mountpropagation=MountPropagationMode.BIDIRECTIONAL), - EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir")] - spec = ClusterSpec(head_node=HeadNodeSpec(compute_template="template", ray_start_params=DEFAULT_HEAD_START_PARAMS, - enable_ingress=True, service_type=ServiceType.ClusterIP, volumes=volumes, - environment=env_s), - worker_groups=[WorkerNodeSpec(group_name="group", compute_template="template", replicas=2, - min_replicas=2, max_replicas=2, - ray_start_params=DEFAULT_WORKER_START_PARAMS, volumes=volumes, - environment=env_s, labels={"key": "value"}), - WorkerNodeSpec(group_name="group1", compute_template="template1", replicas=2, - min_replicas=2, max_replicas=2, - ray_start_params=DEFAULT_WORKER_START_PARAMS, volumes=volumes, - environment=env_s, labels={"key": "value"})]) - print(f"\ncluster spec: {spec.to_string()}") - spec_json = json.dumps(spec.to_dict()) - print(f"cluster spec JSON: {spec_json}") - assert cluster_spec_decoder(json.loads(spec_json)).to_string() == spec.to_string() - - -def test_cluster(): - - event = {"id": "id", "name": "name", "created_at": "ts", "first_timestamp": "ts", "last_timestamp": "ts", - "reason": "reason", "message": "message", "type": "warning", "count": "1"} - print(f"\ncluster event: {ClusterEvent(event).to_string()}") - env_s = EnvironmentVariables(keyvalue={"key": "val"}, - fromref={"key_ref": EnvVarFrom(source=EnvarSource.SECRET, name="my-secret", - key="key")}) - volumes = [PVCVolume(name="pvc", mount_path="tmp/pvc", source="claim", read_only=True, - mountpropagation=MountPropagationMode.BIDIRECTIONAL), - EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir")] - spec = ClusterSpec(head_node=HeadNodeSpec(compute_template="template", ray_start_params=DEFAULT_HEAD_START_PARAMS, - enable_ingress=True, service_type=ServiceType.ClusterIP, volumes=volumes, - environment=env_s, annotations={"a_key": "a_val"}), - worker_groups=[WorkerNodeSpec(group_name="group", compute_template="template", replicas=2, - min_replicas=2, max_replicas=2, - ray_start_params=DEFAULT_WORKER_START_PARAMS, volumes=volumes, - environment=env_s, labels={"key": "value"}), - WorkerNodeSpec(group_name="group1", compute_template="template1", replicas=2, - min_replicas=2, max_replicas=2, - ray_start_params=DEFAULT_WORKER_START_PARAMS, volumes=volumes, - environment=env_s, labels={"key": "value"})]) - cluster = Cluster(name="test", namespace="default", user="boris", version="2.9.0", cluster_spec=spec, - deployment_environment=Environment.DEV, cluster_environment=env_s) - print(f"cluster: {cluster.to_string()}") - cluster_json = json.dumps(cluster.to_dict()) - print(f"cluster JSON: {cluster_json}") - assert cluster_decoder(json.loads(cluster_json)).to_string() == cluster.to_string() - - cluster_dict = cluster.to_dict() - cluster_dict["created_at"] = "created" - cluster_dict["created_status"] = "status" - cluster_dict["events"] = [event] - print(f"cluster with output: {cluster_decoder(cluster_dict).to_string()}") - - -def test_submission(): - yaml = """ - pip: - - requests==2.26.0 - - pendulum==2.1.2 - env_vars: - counter_name: test_counter - """ - request = RayJobRequest(entrypoint="python /home/ray/samples/sample_code.py", - runtime_env=yaml, num_cpu=.5) - print(f"job request: {request.to_string()}") - request_json = json.dumps(request.to_dict()) - print(f"request JSON: {request_json}") - - infoJson = """ - { - "entrypoint":"python /home/ray/samples/sample_code.py", - "jobId":"02000000", - "submissionId":"raysubmit_KWZLwme56esG3Wcr", - "status":"SUCCEEDED", - "message":"Job finished successfully.", - "startTime":"1699442662879", - "endTime":"1699442682405", - "runtimeEnv":{ - "env_vars":"map[counter_name:test_counter]", - "pip":"[requests==2.26.0 pendulum==2.1.2]" - } - } - """ - job_info = RayJobInfo(json.loads(infoJson)) - print(job_info.to_string()) diff --git a/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_scaling_test.py b/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_scaling_test.py deleted file mode 100644 index ea27e033034..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_scaling_test.py +++ /dev/null @@ -1,51 +0,0 @@ -from python_apiserver_client import * -import time - -def test_templates(): - apis = KubeRayAPIs() - print() - start = time.time() - # create - toleration = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) - template_base_name = "test-template" - namespaces = ["default", "test1", "test2", "test3"] - ns_index = 0 - n_templates = 100 - for i in range(n_templates): - tname = f"{template_base_name}-{i}" - template = Template(name=tname, namespace=namespaces[ns_index], cpu=2, memory=8, tolerations=[toleration]) - status, error = apis.create_compute_template(template) - assert status == 200 - assert error is None - ns_index += 1 - if ns_index >= len(namespaces): - ns_index = 0 -# print(f"template {tname} is created") - print(f"created {n_templates} templates in {time.time() - start} sec") - start = time.time() - # list for all ns - status, error, templates = apis.list_compute_templates() - assert status == 200 - assert error is None - print(f"listed {len(templates)} templates in {time.time() - start} sec") - start = time.time() - # list for individual ns - for ns in namespaces: - status, error, templates = apis.list_compute_templates_namespace(ns=ns) - assert status == 200 - assert error is None - print(f"listed {len(templates)} templates in {ns} ns") - print(f"listed templates from individual ns in {time.time() - start} sec") - start = time.time() - # delete - ns_index = 0 - for i in range(n_templates): - tname = f"{template_base_name}-{i}" - status, error = apis.delete_compute_template(ns=namespaces[ns_index], name=tname) - assert status == 200 - assert error is None - ns_index += 1 - if ns_index >= len(namespaces): - ns_index = 0 -# print(f"template {tname} is deleted") - print(f"Deleted {n_templates} templates in {time.time() - start} sec") diff --git a/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_test.py b/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_test.py deleted file mode 100644 index 311ed81d4bf..00000000000 --- a/clients/python-apiserver-client/python_apiserver_client_test/kuberay_api_test.py +++ /dev/null @@ -1,184 +0,0 @@ -import time - -from python_apiserver_client import * - -def test_templates(): - apis = KubeRayAPIs(token="12345") - # create - toleration = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) - template = Template(name="test-template", namespace="default", cpu=2, memory=8, tolerations=[toleration]) - status, error = apis.create_compute_template(template) - assert status == 200 - assert error is None - # duplicate create should fail - status, error = apis.create_compute_template(template) - assert status != 200 - assert error is not None - print(f"\nstatus {status}, error code: {str(error)}") - # get - status, error, t = apis.get_compute_template(ns="default", name="test-template") - assert status == 200 - assert error is None - assert template.to_string() == t.to_string() - # list - status, error, templates = apis.list_compute_templates() - assert status == 200 - assert error is None - assert template.to_string() == templates[0].to_string() - # list ns - status, error, templates = apis.list_compute_templates_namespace(ns="default") - assert status == 200 - assert error is None - assert template.to_string() == templates[0].to_string() - # delete - status, error = apis.delete_compute_template(ns="default", name="test-template") - assert status == 200 - assert error is None - # duplicate delete should fail - status, error = apis.delete_compute_template(ns="default", name="test-template") - assert status != 200 - assert error is not None - print(f"status: {status}, err = {str(error)}") - - -def test_cluster(): - apis = KubeRayAPIs(token="12345") - # Create template first - template = Template(name="default-template", namespace="default", cpu=2, memory=4) - status, error = apis.create_compute_template(template) - assert status == 200 - assert error is None - # cluster - volume = ConfigMapVolume(name="code-sample", mount_path="/home/ray/samples", source="ray-job-code-sample", - items={"sample_code.py": "sample_code.py"}) - environment = EnvironmentVariables(keyvalue={"key": "value"}) - head = HeadNodeSpec(compute_template="default-template", - ray_start_params={"metrics-export-port": "8080", "num-cpus": "0"}, - image="rayproject/ray:2.9.0-py310", service_type=ServiceType.ClusterIP, - volumes=[volume], environment=environment) - worker = WorkerNodeSpec(group_name="small", compute_template="default-template", replicas=1, - min_replicas=1, max_replicas=1, ray_start_params=DEFAULT_WORKER_START_PARAMS, - image="rayproject/ray:2.9.0-py310", volumes=[volume], environment=environment) - cluster = Cluster(name="test", namespace="default", user="boris", version="2.9.0", - cluster_spec=ClusterSpec(head_node=head, worker_groups=[worker])) - # create - status, error = apis.create_cluster(cluster) - assert status == 200 - assert error is None - # get - status, error, c = apis.get_cluster(ns="default", name="test") - assert status == 200 - assert error is None - print(f"\ngot cluster: {c.to_string()}") - # list - status, error, clusters = apis.list_clusters() - assert status == 200 - assert error is None - assert len(clusters) == 1 - print(f"got cluster: {clusters[0].to_string()}") - # list namespace - status, error, clusters = apis.list_clusters_namespace(ns="default") - assert status == 200 - assert error is None - assert len(clusters) == 1 - print(f"got cluster: {clusters[0].to_string()}") - # get cluster status - status, error, cs = apis.get_cluster_status(ns="default", name="test") - assert status == 200 - assert error is None - print(f"cluster status is {cs}") - # Wait for the cluster to get ready - status, error = apis.wait_cluster_ready(ns="default", name="test") - assert status == 200 - assert error is None - # get endpoints - status, error, endpoint = apis.get_cluster_endpoints(ns="default", name="test") - assert status == 200 - assert error is None - print(f"cluster endpoints is {endpoint}") - # delete cluster - status, error = apis.delete_cluster(ns="default", name="test") - assert status == 200 - assert error is None - # delete template - status, error = apis.delete_compute_template(ns="default", name="default-template") - assert status == 200 - assert error is None - -def test_job_submission(): - apis = KubeRayAPIs() - # Create template first - template = Template(name="default-template", namespace="default", cpu=2, memory=4) - status, error = apis.create_compute_template(template) - assert status == 200 - assert error is None - # cluster - volume = ConfigMapVolume(name="code-sample", mount_path="/home/ray/samples", source="ray-job-code-sample", - items={"sample_code.py": "sample_code.py"}) - environment = EnvironmentVariables(keyvalue={"key": "value"}) - head = HeadNodeSpec(compute_template="default-template", - ray_start_params={"metrics-export-port": "8080", "num-cpus": "0"}, - image="rayproject/ray:2.9.0-py310", service_type=ServiceType.ClusterIP, - volumes=[volume], environment=environment) - worker = WorkerNodeSpec(group_name="small", compute_template="default-template", replicas=1, - min_replicas=1, max_replicas=1, ray_start_params=DEFAULT_WORKER_START_PARAMS, - image="rayproject/ray:2.9.0-py310", volumes=[volume], environment=environment) - cluster = Cluster(name="test-job", namespace="default", user="boris", version="2.9.0", - cluster_spec=ClusterSpec(head_node=head, worker_groups=[worker])) - # create - status, error = apis.create_cluster(cluster) - assert status == 200 - assert error is None - # Wait for the cluster to get ready - status, error = apis.wait_cluster_ready(ns="default", name="test-job") - assert status == 200 - assert error is None - # submit Ray job - resource_yaml = """ - pip: - - requests==2.26.0 - - pendulum==2.1.2 - env_vars: - counter_name: test_counter - """ - jobRequest = RayJobRequest(entrypoint="python /home/ray/samples/sample_code.py", - runtime_env=resource_yaml, num_cpu=.5) - status, error, sid = apis.submit_job(ns="default", name="test-job", jobrequest=jobRequest) - assert status == 200 - assert error is None - # get Ray job info - status, error, jinfo = apis.get_job_info(ns="default", name="test-job", sid=sid) - assert status == 200 - assert error is None - print(f"\njobs info {jinfo.to_string()}") - # get Ray jobs info - status, error, jinfos = apis.list_job_info(ns="default", name="test-job") - assert status == 200 - assert error is None - print("jobs info") - for inf in jinfos: - print(f" {inf.to_string()}") - # get Ray job log - time.sleep(5) # wait till log is available - status, error, jlog = apis.get_job_log(ns="default", name="test-job", sid=sid) - assert status == 200 - assert error is None - print(f"job log {jlog}") - # stop Ray job - status, error = apis.stop_ray_job(ns="default", name="test-job", sid=sid) - assert status == 200 - assert error is None - # delete Ray job - status, error = apis.delete_ray_job(ns="default", name="test-job", sid=sid) - assert status == 200 - assert error is None - # delete cluster - status, error = apis.delete_cluster(ns="default", name="test-job") - assert status == 200 - assert error is None - # delete template - status, error = apis.delete_compute_template(ns="default", name="default-template") - assert status == 200 - assert error is None - - diff --git a/clients/python-apiserver-client/src/python_apiserver_client/__init__.py b/clients/python-apiserver-client/src/python_apiserver_client/__init__.py new file mode 100644 index 00000000000..e6cdbec9aa2 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/__init__.py @@ -0,0 +1 @@ +from python_apiserver_client.kuberay_apis import KubeRayAPIs diff --git a/clients/python-apiserver-client/python_apiserver_client/kuberay_apis.py b/clients/python-apiserver-client/src/python_apiserver_client/kuberay_apis.py similarity index 70% rename from clients/python-apiserver-client/python_apiserver_client/kuberay_apis.py rename to clients/python-apiserver-client/src/python_apiserver_client/kuberay_apis.py index 3ea87f8823d..7266c2ac3a3 100644 --- a/clients/python-apiserver-client/python_apiserver_client/kuberay_apis.py +++ b/clients/python-apiserver-client/src/python_apiserver_client/kuberay_apis.py @@ -1,193 +1,207 @@ -import requests import time -from .params import * - +import requests +from python_apiserver_client.params import ( + Cluster, + RayJobInfo, + RayJobRequest, + Template, + cluster_decoder, + clusters_decoder, + template_decoder, + templates_decoder, +) _headers = {"Content-Type": "application/json", "accept": "application/json"} +CONNECT_TIMEOUT = 50 +READ_TIMEOUT = 50 +TIMEOUT = (CONNECT_TIMEOUT, READ_TIMEOUT) + class KubeRayAPIs: """ - This class implements KubeRay APIs based on the API server. - To create a class, the following parameters are required: - base - the URL of the API server (default is set to the standalone API server) - wait interval - the amount of sec to wait between checking for cluster ready - """ - def __init__(self, base: str = "http://localhost:31888", token: str = None, - wait_interval: int = 2) -> None: - self.base = base + This class implements KubeRay APIs based on the API server. + To create a class, the following parameters are required: + base - the URL of the API server (default is set to the standalone API server) + wait interval - the amount of sec to wait between checking for cluster ready + """ + def __init__( + self, + server_url: str = "http://localhost:31888", + token: str = None, + wait_interval: int = 2 + ) -> None: + """ + Initialization + :param server_url: API server url + :param token: token, only used for API server with security enabled + :param wait_interval: wait interval + """ + self.base = server_url if token is not None: _headers["Authorization"] = token self.wait_interval = wait_interval self.api_base = "/apis/v1/" - """ - List compute templates across all namespaces of the k8 cluster - Returns: + def list_compute_templates(self) -> tuple[int, str, list[Template]]: + """ + List compute templates across all namespaces of the k8 cluster + :return: tuple containing http return code message - only returned if http return code is not equal to 200 list of compute templates - """ - def list_compute_templates(self) -> tuple[int, str, list[Template]]: + """ # Execute HTTP request url = self.base + self.api_base + "compute_templates" - response = requests.get(url, headers=_headers, timeout=None) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, templates_decoder(response.json()) - """ - List compute templates across for a given namespaces of the k8 cluster - Parameter: - namespace to query - Returns: + def list_compute_templates_namespace(self, ns: str) -> tuple[int, str, list[Template]]: + """ + List compute templates across for a given namespaces of the k8 cluster + :param ns: namespace to query + :return: return tuple containing http return code message - only returned if http return code is not equal to 200 list of compute templates - """ - def list_compute_templates_namespace(self, ns: str) -> tuple[int, str, list[Template]]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/compute_templates" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, templates_decoder(response.json()) - """ + def get_compute_template(self, ns: str, name: str) -> tuple[int, str, Template]: + """ get a compute template - Parameter: - namespace - template name - Returns: + :param ns: namespace + :param name: template name + :return: tuple containing http return code message - only returned if http return code is not equal to 200 compute templates - """ - def get_compute_template(self, ns: str, name: str) -> tuple[int, str, Template]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/compute_templates/{name}" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, template_decoder(response.json()) - """ + def create_compute_template(self, template: Template) -> tuple[int, str]: + """ Create a compute template - Parameter: - template - definition of a template - Returns: + :param template - definition of a template + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def create_compute_template(self, template: Template) -> tuple[int, str]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{template.namespace}/compute_templates" - response = requests.post(url, json=template.to_dict(), headers=_headers, timeout=(10, 10)) + response = requests.post(url, json=template.to_dict(), headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - """ + def delete_compute_template(self, ns: str, name: str) -> tuple[int, str]: + """ delete a compute template - Parameter: - namespace - template name - Returns: + :param ns: namespace + :param name: template name + :returns: a tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def delete_compute_template(self, ns: str, name: str) -> tuple[int, str]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/compute_templates/{name}" - response = requests.delete(url, headers=_headers, timeout=(10, 10)) + response = requests.delete(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - """ - List clusters across all namespaces of the k8 cluster - Returns: + def list_clusters(self) -> tuple[int, str, list[Cluster]]: + """ + List clusters across all namespaces of the k8 cluster + :returns: a tuple containing http return code message - only returned if http return code is not equal to 200 list of clusters - """ - def list_clusters(self) -> tuple[int, str, list[Cluster]]: + """ # Execute HTTP request url = self.base + self.api_base + "clusters" - response = requests.get(url, headers=_headers, timeout=None) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, clusters_decoder(response.json()) - """ - List clusters across for a given namespaces of the k8 cluster - Parameter: - namespace to query - Returns: + def list_clusters_namespace(self, ns: str) -> tuple[int, str, list[Cluster]]: + """ + List clusters across for a given namespaces of the k8 cluster + :param ns: namespace to query + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 list of clusters - """ - def list_clusters_namespace(self, ns: str) -> tuple[int, str, list[Cluster]]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/clusters" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, clusters_decoder(response.json()) - """ + def get_cluster(self, ns: str, name: str) -> tuple[int, str, Cluster]: + """ get cluster - Parameter: - namespace to query - name of the cluster - Returns: + :param ns: namespace + :param name: name of the cluster + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 clusters definition - """ - def get_cluster(self, ns: str, name: str) -> tuple[int, str, Cluster]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/clusters/{name}" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) # Check execution status if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, cluster_decoder(response.json()) - """ + def create_cluster(self, cluster: Cluster) -> tuple[int, str]: + """ create cluster - Parameter: - cluster definition - Returns: + :param cluster: cluster definition + :return: tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def create_cluster(self, cluster: Cluster) -> tuple[int, str]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{cluster.namespace}/clusters" - response = requests.post(url, json=cluster.to_dict(), headers=_headers, timeout=(10, 10)) + response = requests.post(url, json=cluster.to_dict(), headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - """ + def get_cluster_status(self, ns: str, name: str) -> tuple[int, str, str]: + """ get cluster status - Parameter: - namespace of the cluster - name of the cluster - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 cluster status - """ - def get_cluster_status(self, ns: str, name: str) -> tuple[int, str, str]: + """ # Execute HTTP request status, error, cluster = self.get_cluster(ns=ns, name=name) # Check execution status @@ -198,18 +212,17 @@ def get_cluster_status(self, ns: str, name: str) -> tuple[int, str, str]: cluster_status = cluster.cluster_status return status, None, cluster_status - """ - wait cluster ready - Parameter: - namespace of the cluster - name of the cluster - wait time (-1 waits forever) - Returns: + def wait_cluster_ready(self, ns: str, name: str, wait: int = -1) -> tuple[int, str]: + """ + wait for cluster to be ready + :param ns: namespace of the cluster + :param name: name of the cluster + :param wait: wait time (-1 waits forever) + :returns: A tuple containing http return code message - only returned if http return code is not equal to 200 cluster status - """ - def wait_cluster_ready(self, ns: str, name: str, wait: int = -1) -> tuple[int, str]: + """ current_wait = 0 while True: status, error, c_status = self.get_cluster_status(ns=ns, name=name) @@ -223,18 +236,17 @@ def wait_cluster_ready(self, ns: str, name: str, wait: int = -1) -> tuple[int, s time.sleep(self.wait_interval) current_wait += self.wait_interval - """ + def get_cluster_endpoints(self, ns: str, name: str, wait: int = -1) -> tuple[int, str, str]: + """ get cluster endpoint - Parameter: - namespace of the cluster - name of the cluster - wait time (-1 waits forever) for cluster to be ready - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param wait: wait time (-1 waits forever) for cluster to be ready + :returns: a tuple containing http return code message - only returned if http return code is not equal to 200 endpoint (service for dashboard endpoint) - """ - def get_cluster_endpoints(self, ns: str, name: str, wait: int = -1) -> tuple[int, str, str]: + """ # Ensure that the cluster is ready status, error = self.wait_cluster_ready(ns=ns, name=name, wait=wait) if status // 100 != 2: @@ -245,72 +257,68 @@ def get_cluster_endpoints(self, ns: str, name: str, wait: int = -1) -> tuple[int return status, error, None return status, None, f"{name}-head-svc.{ns}.svc.cluster.local:{cluster.service_endpoint['dashboard']}" - """ + def delete_cluster(self, ns: str, name: str) -> tuple[int, str]: + """ delete cluster - Parameter: - namespace of the cluster - name of the cluster - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def delete_cluster(self, ns: str, name: str) -> tuple[int, str]: + """ # Execute HTTP request url = self.base + self.api_base + f"namespaces/{ns}/clusters/{name}" - response = requests.delete(url, headers=_headers) + response = requests.delete(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - """ + def submit_job(self, ns: str, name: str, job_request: RayJobRequest) -> tuple[int, str, str]: + """ submit Ray job - Parameter: - namespace of the cluster - name of the cluster - job submission - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param job_request: job submission + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 submission id - """ - def submit_job(self, ns: str, name: str, jobrequest: RayJobRequest) -> tuple[int, str, str]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}" - response = requests.post(url, json=jobrequest.to_dict(), headers=_headers, timeout=(10, 10)) + response = requests.post(url, json=job_request.to_dict(), headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, response.json()["submissionId"] - """ + def get_job_info(self, ns: str, name: str, sid: str) -> tuple[int, str, RayJobInfo]: + """ get Ray job details - Parameter: - namespace of the cluster - name of the cluster - job submission id - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param sid: job submission id + return: a tuple containing http return code message - only returned if http return code is not equal to 200 RayJobInfo object - """ - def get_job_info(self, ns: str, name: str, sid: str) -> tuple[int, str, RayJobInfo]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}/{sid}" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, RayJobInfo(response.json()) - """ + def list_job_info(self, ns: str, name: str) -> tuple[int, str, list[RayJobInfo]]: + """ list Ray job details - Parameter: - namespace of the cluster - name of the cluster - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :return: a tuple containing http return code message - only returned if http return code is not equal to 200 list of RayJobInfo object - """ - def list_job_info(self, ns: str, name: str) -> tuple[int, str, list[RayJobInfo]]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None infos = response.json().get("submissions", None) @@ -318,55 +326,51 @@ def list_job_info(self, ns: str, name: str) -> tuple[int, str, list[RayJobInfo]] return response.status_code, None, [] return response.status_code, None, [RayJobInfo(i) for i in infos] - """ + def get_job_log(self, ns: str, name: str, sid: str) -> tuple[int, str, str]: + """ get Ray job log - Parameter: - namespace of the cluster - name of the cluster - job submission id - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param sid: job submission id + return: a tuple containing http return code message - only returned if http return code is not equal to 200 log - """ - def get_job_log(self, ns: str, name: str, sid: str) -> tuple[int, str, str]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}/log/{sid}" - response = requests.get(url, headers=_headers, timeout=(10, 10)) + response = requests.get(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"], None return response.status_code, None, response.json().get("log", "") - """ + def stop_ray_job(self, ns: str, name: str, sid: str) -> tuple[int, str]: + """ stop Ray job - Parameter: - namespace of the cluster - name of the cluster - job submission id - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param sid: job submission id + return: a tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def stop_ray_job(self, ns: str, name: str, sid: str) -> tuple[int, str]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}/{sid}" - response = requests.post(url, headers=_headers, timeout=(10, 10)) + response = requests.post(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - """ + def delete_ray_job(self, ns: str, name: str, sid: str) -> tuple[int, str]: + """ delete Ray job - Parameter: - namespace of the cluster - name of the cluster - job submission id - Returns: + :param ns: namespace of the cluster + :param name: name of the cluster + :param sid: job submission id + return: a tuple containing http return code message - only returned if http return code is not equal to 200 - """ - def delete_ray_job(self, ns: str, name: str, sid: str) -> tuple[int, str]: + """ url = self.base + self.api_base + f"namespaces/{ns}/jobsubmissions/{name}/{sid}" - response = requests.delete(url, headers=_headers, timeout=(10, 10)) + response = requests.delete(url, headers=_headers, timeout=TIMEOUT) if response.status_code // 100 != 2: return response.status_code, response.json()["message"] return response.status_code, None - diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/__init__.py b/clients/python-apiserver-client/src/python_apiserver_client/params/__init__.py new file mode 100644 index 00000000000..ae3b3973957 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/__init__.py @@ -0,0 +1,53 @@ +from python_apiserver_client.params.templates import ( + TolerationOperation, + TolerationEffect, + Toleration, + Template, + toleration_decoder, + template_decoder, + templates_decoder, +) +from python_apiserver_client.params.volumes import ( + HostPath, + MountPropagationMode, + AccessMode, + BaseVolume, + HostPathVolume, + PVCVolume, + EphemeralVolume, + EmptyDirVolume, + ConfigMapVolume, + SecretVolume, + volume_decoder, +) +from python_apiserver_client.params.environmentvariables import ( + EnvVarSource, + EnvVarFrom, + EnvironmentVariables, + env_var_from_decoder, + environment_variables_decoder, +) +from python_apiserver_client.params.headnode import ( + ServiceType, + HeadNodeSpec, + DEFAULT_HEAD_START_PARAMS, + head_node_spec_decoder, +) +from python_apiserver_client.params.workernode import ( + WorkerNodeSpec, + DEFAULT_WORKER_START_PARAMS, + worker_node_spec_decoder, +) +from python_apiserver_client.params.cluster import ( + Environment, + AutoscalerOptions, + ClusterSpec, + ClusterEvent, + Cluster, + UpscalingMode, + autoscaling_decoder, + cluster_spec_decoder, + cluster_decoder, + clusters_decoder, +) +from python_apiserver_client.params.jobsubmission import RayJobRequest, RayJobInfo diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/cluster.py b/clients/python-apiserver-client/src/python_apiserver_client/params/cluster.py new file mode 100644 index 00000000000..6781500cc66 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/cluster.py @@ -0,0 +1,463 @@ +import enum +from typing import Any + +from python_apiserver_client.params import ( + BaseVolume, + EnvironmentVariables, + HeadNodeSpec, + WorkerNodeSpec, + environment_variables_decoder, + head_node_spec_decoder, + volume_decoder, + worker_node_spec_decoder, +) + + +class Environment(enum.Enum): + """ + Environment definitions + """ + + DEV = 0 # development + TESTING = 1 # testing + STAGING = 2 # staging + PRODUCTION = 3 # production + + +class UpscalingMode(enum.Enum): + """ + Enumeration of autoscaling mode + """ + + Conservative = ( + "Conservative" # Rate-limited; the number of pending worker pods is at most the size of the Ray cluster + ) + Default = "Default" # no rate limitations + Aggressive = "Aggressive" # same as default + + +class AutoscalerOptions: + """ + AutoscalerOptions is used to define Ray cluster autoscaling. + It provides APIs to create, stringify and convert to dict. + + Methods: + - Create autoscaling options specification: gets the following parameters: + idle_timeout - optional, number of seconds to wait before scaling down a worker pod which is not using Ray + resources. Default 60sec (one minute). + upscaling_mode - required autoscaler upscaling mode + image - optional, allows to override the autoscaler's container image + image_pull_policy - optional, allows to override the autoscaler's container image pull policy + cpus - optional, CPUs requirements for autoscaler - default "500m" + memory - optional, memory requirements for autoscaler - default "512Mi" + environment - optional, environment variables for autoscaler container + volumes - optional, a list of volumes to attach to autoscaler container. + This is needed for enabling TLS for the autoscaler container. + """ + + def __init__( + self, + upscaling_mode: UpscalingMode = UpscalingMode.Default, + idle_tmout: int = None, + image: str = None, + image_pull_policy: str = None, + cpus: str = None, + memory: str = None, + environment: EnvironmentVariables = None, + volumes: list[BaseVolume] = None, + ): + """ + Initialization + :param upscaling_mode: upscale mode + :param idle_tmout: idle timeout + :param image: image + :param image_pull_policy: image pull policy + :param cpus: cpu requirement for autoscaling + :param memory: memory requirement for autoscaling + :param environment: autoscaler environment + :param volumes: volumes for autoscaler + """ + self.upscaling_mode = upscaling_mode + self.idle_tmout = idle_tmout + self.image = image + self.image_pull_policy = image_pull_policy + self.cpus = cpus + self.memory = memory + self.environment = environment + self.volumes = volumes + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of the head node + """ + val = f"upscaling_mode = {self.upscaling_mode}" + if self.idle_tmout is not None: + val += f", idle_timeout = {self.idle_tmout}" + if self.image is not None: + val += f", image = {self.image}" + if self.image_pull_policy is not None: + val += f", image_pull_policy = {self.image_pull_policy}" + if self.cpus is not None: + val += f", cpus = {self.cpus}" + if self.memory is not None: + val += f", memory = {self.memory}" + if self.volumes is not None: + val = val + ",\n volumes = [" + first = True + for v in self.volumes: + if first: + first = False + else: + val += ", " + val = val + "{" + v.to_string() + "}" + val = val + "]" + if self.environment is not None: + val = val + f",\n environment = {self.environment.to_string()}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of the head node + """ + dct = {"upscalingMode": self.upscaling_mode.value} + if self.idle_tmout is not None: + dct["idleTimeoutSeconds"] = self.idle_tmout + if self.image is not None: + dct["image"] = self.image + if self.image_pull_policy is not None: + dct["imagePullPolicy"] = self.image_pull_policy + if self.cpus is not None: + dct["cpu"] = self.cpus + if self.memory is not None: + dct["memory"] = self.memory + if self.volumes is not None: + dct["volumes"] = [v.to_dict() for v in self.volumes] + if self.environment is not None: + dct["envs"] = self.environment.to_dict() + return dct + + +class ClusterSpec: + """ + ClusterSpec is used to define Ray cluster. + It provides APIs to create, stringify, convert to dict and json. + + Methods: + - Create cluster spec from: gets the following parameters: + head_group_spec - required, specification of the head node + worker_group_spec - optional, list of worker group specs + autoscaler_options - optional, autoscaling options + - to_string() -> str: convert toleration to string for printing + - to_dict() -> dict[str, Any] convert to dict + """ + + def __init__( + self, + head_node: HeadNodeSpec, + worker_groups: list[WorkerNodeSpec] = None, + autoscaling_options: AutoscalerOptions = None, + ): + """ + Initialization + :param head_node - head node definition + :param worker_groups - worker group definition + :param autoscaling_options - autoscaler options + """ + self.head_node = head_node + self.worker_groups = worker_groups + self.autoscaling_options = autoscaling_options + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of cluster spec + """ + val = f"head_group_spec: {self.head_node.to_string()}" + if self.worker_groups is not None: + val += "\nworker groups: " + for w in self.worker_groups: + val += f"\nworker_group_spec = {w.to_string()}]" + if self.autoscaling_options is not None: + val += f"\nautoscaling options = {self.autoscaling_options.to_string()}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: Dictionary representation of cluster spec + """ + dst = {"headGroupSpec": self.head_node.to_dict()} + if self.worker_groups is not None: + dst["workerGroupSpec"] = [w.to_dict() for w in self.worker_groups] + if self.autoscaling_options is not None: + dst["enableInTreeAutoscaling"] = True + dst["autoscalerOptions"] = self.autoscaling_options.to_dict() + return dst + + +class ClusterEvent: + """ + Cluster event is used to define events emitted during cluster creation. + It provides APIs to create and stringify. Its output only data, so we do not need to implement to_dict + + Methods: + - Create event: gets the dictionary with the following parameters: + id - unique Event Id + name - human readable event name + created_at - event creation time + first_timestamp - first time the event occur + last_timestamp - last time the event occur + reason - reason for the transition into the object's current status + message - human-readable description of the status of this operation + type - type of this event (Normal, Warning), new types could be added in the future + count - number of times this event has occurred + """ + + def __init__(self, dst: dict[str, Any]): + """ + Initialization from dictionary + :param dst: dictionary representation of cluster event + """ + self.id = dst.get("id", "") + self.name = dst.get("name", "") + self.created_at = dst.get("created_at", "") + self.first_timestamp = dst.get("first_timestamp", "") + self.last_timestamp = dst.get("last_timestamp", "") + self.reason = dst.get("reason", "") + self.message = dst.get("message", "") + self.type = dst.get("type", "") + self.count = dst.get("count", "0") + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of cluster event + """ + return ( + f"id = {self.id}, name = {self.name}, created_at = {self.created_at}, " + f"first_timestamp = {self.first_timestamp}, last_timestamp = {self.last_timestamp}," + f"reason = {self.reason}, message = {self.message}, type = {self.type}, count = {self.count}" + ) + + +class Cluster: + """ + Cluster is used to define Ray cluster. + It provides APIs to create, stringify, convert to dict and json. + + Methods: + - Create env variable from: gets the following parameters: + name - required, unique (per namespace) cluster name + namespace - required, cluster's namespace (should exist) + user - required, user who owns the cluster + version - required, Ray cluster version - typically Ray version + deployment_environment - optional (see Environment) + cluster_spec - required, ray cluster configuration + annotations - optional, annotations, for example, "kubernetes.io/ingress.class" to define Ingress class + cluster_environment - optional, cluster environment variables + created_at - output, cluster creation ts + deleted_at - output, cluster deletion ts + cluster_status - output, cluster status + events - output, cluster events + service_endpoint - output, cluster service endpoints + - to_string() -> str: convert toleration to string for printing + - to_dict() -> dict[str, Any] convert to dict + """ + + def __init__( + self, + name: str, + namespace: str, + user: str, + version: str, + cluster_spec: ClusterSpec, + deployment_environment: Environment = None, + annotations: dict[str, str] = None, + cluster_environment: EnvironmentVariables = None, + created_at: str = None, + deleted_at: str = None, + cluster_status: str = None, + events: list[ClusterEvent] = None, + service_endpoint: dict[str, str] = None, + ): + """ + Initialization + :param name: cluster name + :param namespace: cluster namespace + :param user: user name + :param version: version + :param cluster_spec: cluster spec + :param deployment_environment: cluster deployment environment + :param annotations: cluster annotations + :param cluster_environment: cluster environment + :param created_at: created at + :param deleted_at: deleted at + :param cluster_status: status + :param events: cluster events + :param service_endpoint: service endpoint + """ + self.name = name + self.namespace = namespace + self.user = user + self.version = version + self.cluster_spec = cluster_spec + self.environment = deployment_environment + self.annotations = annotations + self.envs = cluster_environment + self.created_at = created_at + self.deleted_at = deleted_at + self.cluster_status = cluster_status + self.events = events + self.service_endpoint = service_endpoint + + def to_string(self) -> str: + """ + convert to string representation + :return: string representation of cluster + """ + val = ( + f"name: {self.name}, namespace = {self.namespace}, user = {self.user}, version = {self.version} " + f"cluster_spec = {self.cluster_spec.to_string()}" + ) + if self.environment is not None: + val += f"deployment environment = {self.environment.name}" + if self.annotations is not None: + val += f" ,annotations = {str(self.annotations)}" + if self.envs is not None: + val = val + f",cluster environment = {self.envs.to_string()}" + val += "\ncluster output\n" + if self.created_at is not None: + val += f" ,created_at = {self.created_at}" + if self.deleted_at is not None: + val += f" ,deleted_at = {self.deleted_at}" + if self.cluster_status is not None: + val += f" ,cluster status = {self.cluster_status}" + if self.events is not None: + val = val + ",\n cluster events = [" + first = True + for e in self.events: + if first: + first = False + else: + val += ", " + val = val + "{" + e.to_string() + "}" + val = val + "]" + if self.service_endpoint is not None: + val += f" ,service endpoints = {str(self.service_endpoint)}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + convert to dictionary + :return: dictionary representation of cluster + """ + # only convert input variables + dst = { + "name": self.name, + "namespace": self.namespace, + "user": self.user, + "version": self.version, + "clusterSpec": self.cluster_spec.to_dict(), + } + if self.environment is not None: + dst["environment"] = self.environment.value + if self.annotations is not None: + dst["annotations"] = self.annotations + if self.envs is not None: + dst["envs"] = self.envs.to_dict() + return dst + + +""" + Creates new cluster from dictionary, used for unmarshalling json. Python does not + support multiple constructors, so do it this way +""" + + +def autoscaling_decoder(dct: dict[str, Any]) -> AutoscalerOptions: + """ + Create autoscaling options from its dictionary representation + :param dct: dictionary representation of cluster spec + :return: autoscaling options + """ + upscaling_mode = UpscalingMode.Default + if "upscalingMode" in dct: + upscaling_mode = UpscalingMode(dct.get("upscalingMode")) + volumes = None + if "volumes" in dct: + volumes = [volume_decoder(v) for v in dct["volumes"]] + environments = None + if "environment" in dct and len(dct.get("envs")) > 0: + environments = environment_variables_decoder(dct.get("envs")) + return AutoscalerOptions( + upscaling_mode=upscaling_mode, + idle_tmout=dct.get("idleTimeoutSeconds", None), + image=dct.get("image", None), + image_pull_policy=dct.get("imagePullPolicy", None), + cpus=dct.get("cpu", None), + memory=dct.get("memory", None), + environment=environments, + volumes=volumes, + ) + + +def cluster_spec_decoder(dct: dict[str, Any]) -> ClusterSpec: + """ + Create cluster spec from its dictionary representation + :param dct: dictionary representation of cluster spec + :return: cluster spec + """ + workers = None + autoscaling_options = None + if "workerGroupSpec" in dct: + workers = [worker_node_spec_decoder(w) for w in dct["workerGroupSpec"]] + if "enableInTreeAutoscaling" in dct and dct.get("enableInTreeAutoscaling"): + autoscaling_options = autoscaling_decoder(dct.get("autoscalerOptions", {})) + return ClusterSpec( + head_node=head_node_spec_decoder(dct.get("headGroupSpec")), + worker_groups=workers, + autoscaling_options=autoscaling_options, + ) + + +def cluster_decoder(dct: dict[str, Any]) -> Cluster: + """ + Create cluster from its dictionary representation + :param dct: dictionary representation of cluster + :return: cluster + """ + environment = None + if "environment" in dct: + environment = Environment(int(dct.get("environment", "0"))) + events = None + if "events" in dct: + events = [ClusterEvent(c) for c in dct["events"]] + envs = None + if "envs" in dct: + envs = environment_variables_decoder(dct.get("envs")) + return Cluster( + name=dct.get("name", ""), + namespace=dct.get("namespace", ""), + user=dct.get("user", ""), + version=dct.get("version", ""), + cluster_spec=cluster_spec_decoder(dct.get("clusterSpec")), + deployment_environment=environment, + annotations=dct.get("annotations"), + cluster_environment=envs, + created_at=dct.get("createdAt"), + deleted_at=dct.get("deletedAt"), + cluster_status=dct.get("clusterState"), + events=events, + service_endpoint=dct.get("serviceEndpoint"), + ) + + +def clusters_decoder(dct: dict[str, any]) -> list[Cluster]: + """ + Create list of clusters from its dictionary representation + :param dct: dictionary representation of a list of clusters + :return: list of clusters + """ + return [cluster_decoder(cluster) for cluster in dct["clusters"]] diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/environmentvariables.py b/clients/python-apiserver-client/src/python_apiserver_client/params/environmentvariables.py new file mode 100644 index 00000000000..a89610a17f6 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/environmentvariables.py @@ -0,0 +1,146 @@ +import enum +from typing import Any + + +class EnvVarSource(enum.Enum): + """ + Enumeration of environment sources + """ + + CONFIGMAP = 0 # config map + SECRET = 1 # secret + RESOURCE_FIELD = 2 # resource field + FIELD = 3 # field + + +class EnvVarFrom: + """ + EnvVarFrom is used to define an environment variable from one of the sources (EnvarSource). + It provides APIs to create, stringify, convert to dict and json. + + Methods: + - Create env variable from: gets the following parameters: + Source required - source of environment variable + name required name for config map or secret, container name for resource, path for field + key required Key for config map or secret, resource name for resource + - to_string() -> str: convert toleration to string for printing + - to_dict() -> dict[str, Any] convert to dict + """ + + def __init__(self, source: EnvVarSource, name: str, key: str): + """ + Initialize + :param source - source + :param name source name + :param key source key + """ + self.source = source + self.name = name + self.key = key + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of environment from + """ + return f"source = {self.source.name}, name = {self.name}, key = {self.key}" + + def to_dict(self) -> dict[str, Any]: + """ + convert to dictionary + :return: dictionary representation of environment from + """ + return {"source": self.source.value, "name": self.name, "key": self.key} + + +class EnvironmentVariables: + """ + EnvironmentVariables is used to define environment variables. + It provides APIs to create, stringify, convert to dict and json. + + Methods: + - Create env variable from: gets the following parameters: + key_value - optional, dictionary of key/value environment variables + from_ref - optional, dictionary of reference environment variables + - to_string() -> str: convert toleration to string for printing + - to_dict() -> dict[str, Any] convert to dict + """ + + def __init__(self, key_value: dict[str, str] = None, from_ref: dict[str, EnvVarFrom] = None): + """ + Initialization + :param key_value: dictionary of key/value pairs for environment variables + :param from_ref: dictionary of key/value pairs for environment from variables + """ + self.key_val = key_value + self.from_ref = from_ref + + def to_string(self) -> str: + """ + convert to string + :return: string representation of environment variables + """ + val = "" + if self.key_val is not None: + val = f"values = {str(self.key_val)}" + if self.from_ref is not None: + if val != "": + val += " , " + val += "valuesFrom = {" + first = True + for k, v in self.from_ref.items(): + if not first: + val += ", " + else: + first = False + val += f"{k} = [{v.to_string()}]" + val += "}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of environment variables + """ + dst = {} + if self.key_val is not None: + dst["values"] = self.key_val + if self.from_ref is not None: + fr = {} + for k, v in self.from_ref.items(): + fr[k] = v.to_dict() + dst["valuesFrom"] = fr + return dst + + +""" + Creates new environment variable from from dictionary, used for unmarshalling json. Python does not + support multiple constructors, so do it this way +""" + + +def env_var_from_decoder(dct: dict[str, Any]) -> EnvVarFrom: + """ + Create environment from from dictionary + :param dct: dictionary representations of environment from + :return: environment from + """ + return EnvVarFrom(name=dct.get("name", ""), source=EnvVarSource(int(dct.get("source", 0))), key=dct.get("key", "")) + + +def environment_variables_decoder(dct: dict[str, Any]) -> EnvironmentVariables: + """ + Create environment variables from from dictionary + :param dct: dictionary representations of environment variables + :return: environment variables + """ + keyvalues = None + fr = None + if "values" in dct: + keyvalues = dct.get("values") + if "valuesFrom" in dct: + from_ref = dct.get("valuesFrom") + fr = {} + for k, v in from_ref.items(): + fr[k] = env_var_from_decoder(v) + return EnvironmentVariables(key_value=keyvalues, from_ref=fr) diff --git a/clients/python-apiserver-client/python_apiserver_client/params/headnode.py b/clients/python-apiserver-client/src/python_apiserver_client/params/headnode.py similarity index 58% rename from clients/python-apiserver-client/python_apiserver_client/params/headnode.py rename to clients/python-apiserver-client/src/python_apiserver_client/params/headnode.py index 7c99a7206c4..588d7e2765f 100644 --- a/clients/python-apiserver-client/python_apiserver_client/params/headnode.py +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/headnode.py @@ -1,13 +1,24 @@ -from .volumes import * -from .environmentvariables import * +import enum +from typing import Any -DEFAULT_HEAD_START_PARAMS = {"dashboard-host": "0.0.0.0", "metrics-export-port": "8080"} +from python_apiserver_client.params import ( + BaseVolume, + EnvironmentVariables, + environment_variables_decoder, + volume_decoder, +) + +DEFAULT_HEAD_START_PARAMS = {"dashboard-host": "0.0.0.0", "metrics-export-port": "8080", "num-cpus": "0"} class ServiceType(enum.Enum): - ClusterIP = "ClusterIP" - NodePort = "NodePort" - LoadBalancer = "LoadBalancer" + """ + Enumeration of head node service types + """ + + ClusterIP = "ClusterIP" # cluster IP + NodePort = "NodePort" # node port + LoadBalancer = "LoadBalancer" # load balancer class HeadNodeSpec: @@ -28,12 +39,39 @@ class HeadNodeSpec: environment - optional, environment variables for head pod annotations - optional, annotations for head node labels - optional, labels for head node + image_pull_policy - optional, head node pull image policy. Default IfNotPresent """ - def __init__(self, compute_template: str, ray_start_params: dict[str, str], image: str = None, - service_type: ServiceType = None, enable_ingress: bool = False, - volumes: list[BaseVolume] = None, service_account: str = None, image_pull_secret: str = None, - environment: EnvironmentVariables = None, annotations: dict[str, str] = None, - labels: dict[str, str] = None) -> None: + + def __init__( + self, + compute_template: str, + image: str, + ray_start_params: dict[str, str] = DEFAULT_HEAD_START_PARAMS, + service_type: ServiceType = ServiceType.ClusterIP, + enable_ingress: bool = False, + volumes: list[BaseVolume] = None, + service_account: str = None, + image_pull_secret: str = None, + environment: EnvironmentVariables = None, + annotations: dict[str, str] = None, + labels: dict[str, str] = None, + image_pull_policy: str = None, + ): + """ + Initialization + :param compute_template: compute template + :param ray_start_params: ray start parameters + :param image: node image + :param service_type: service type + :param enable_ingress: enable ingress flag + :param volumes: volumes for head node + :param service_account: service account + :param image_pull_secret: image pull secret + :param environment: head node environment + :param annotations: head node annotation + :param labels: labels + :param image_pull_policy: image pull policy + """ self.compute_template = compute_template self.ray_start_params = ray_start_params @@ -47,8 +85,13 @@ def __init__(self, compute_template: str, ray_start_params: dict[str, str], imag self.environment = environment self.annotations = annotations self.labels = labels + self.image_pull_policy = image_pull_policy def to_string(self) -> str: + """ + Convert to string + :return: string representation of the head node + """ val = f"compute template = {self.compute_template}, ray start params = {str(self.ray_start_params)}" if self.image is not None: val += f", image = {self.image}" @@ -60,6 +103,8 @@ def to_string(self) -> str: val += f", service_account = {self.service_account}" if self.image_pull_secret is not None: val += f", image_pull_secret = {self.image_pull_secret}" + if self.image_pull_policy is not None: + val += f", image_pull_policy = {self.image_pull_policy}" if self.volumes is not None: val = val + ",\n volumes = [" first = True @@ -78,7 +123,11 @@ def to_string(self) -> str: val = val + f",\n labels = {str(self.labels)}" return val - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of the head node + """ dct = {"computeTemplate": self.compute_template, "rayStartParams": self.ray_start_params} if self.image is not None: dct["image"] = self.image @@ -90,6 +139,8 @@ def to_dict(self) -> dict[str, any]: dct["service_account"] = self.service_account if self.image_pull_secret is not None: dct["image_pull_secret"] = self.image_pull_secret + if self.image_pull_policy is not None: + dct["imagePullPolicy"] = self.image_pull_policy if self.volumes is not None: dct["volumes"] = [v.to_dict() for v in self.volumes] if self.environment is not None: @@ -107,7 +158,12 @@ def to_dict(self) -> dict[str, any]: """ -def head_node_spec_decoder(dct: dict[str, any]) -> HeadNodeSpec: +def head_node_spec_decoder(dct: dict[str, Any]) -> HeadNodeSpec: + """ + Create head node spec from dictionary + :param dct: dictionary representation of head node spec + :return: Head node spec + """ service_type = None if "serviceType" in dct: service_type = ServiceType(dct.get("serviceType", "ClusterIP")) @@ -116,11 +172,18 @@ def head_node_spec_decoder(dct: dict[str, any]) -> HeadNodeSpec: volumes = [volume_decoder(v) for v in dct["volumes"]] environments = None if "environment" in dct and len(dct.get("environment")) > 0: - environments = environmentvariables_decoder(dct.get("environment")) - return HeadNodeSpec(compute_template=dct.get("computeTemplate"), ray_start_params=dct.get("rayStartParams"), - image=dct.get("image"), service_type=service_type, - enable_ingress=dct.get("enableIngress", False), - volumes=volumes, service_account=dct.get("service_account"), - image_pull_secret=dct.get("image_pull_secret"), - environment=environments, annotations=dct.get("annotations"), - labels=dct.get("labels")) + environments = environment_variables_decoder(dct.get("environment")) + return HeadNodeSpec( + compute_template=dct.get("computeTemplate"), + ray_start_params=dct.get("rayStartParams"), + image=dct.get("image"), + service_type=service_type, + enable_ingress=dct.get("enableIngress", False), + volumes=volumes, + service_account=dct.get("service_account", None), + image_pull_secret=dct.get("imagePullSecret", None), + image_pull_policy=dct.get("imagePullPolicy", None), + environment=environments, + annotations=dct.get("annotations", None), + labels=dct.get("labels", None), + ) diff --git a/clients/python-apiserver-client/python_apiserver_client/params/jobsubmission.py b/clients/python-apiserver-client/src/python_apiserver_client/params/jobsubmission.py similarity index 59% rename from clients/python-apiserver-client/python_apiserver_client/params/jobsubmission.py rename to clients/python-apiserver-client/src/python_apiserver_client/params/jobsubmission.py index f99156e839f..177f405cbcf 100644 --- a/clients/python-apiserver-client/python_apiserver_client/params/jobsubmission.py +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/jobsubmission.py @@ -1,4 +1,5 @@ import datetime +from typing import Any class RayJobRequest: @@ -16,9 +17,27 @@ class RayJobRequest: num_gpus - optional, number of gpus for job execution resources - optional, dictionary of the resources for job execution """ - def __init__(self, entrypoint: str, submission_id: str = None, runtime_env: str = None, - metadata: dict[str, str] = None, num_cpu: float = -1., num_gpu: float = -1., - resources: dict[str, str] = None) -> None: + + def __init__( + self, + entrypoint: str, + submission_id: str = None, + runtime_env: str = None, + metadata: dict[str, str] = None, + num_cpu: float = -1.0, + num_gpu: float = -1.0, + resources: dict[str, str] = None, + ): + """ + Initialization see https://docs.ray.io/en/latest/cluster/running-applications/job-submission/api.html + :param entrypoint: entrypoint + :param submission_id: submission id + :param runtime_env: runtime environment + :param metadata: submission metadata + :param num_cpu: job number cpus + :param num_gpu: job number gpus + :param resources: job custom resources + """ self.entrypoint = entrypoint self.submission_id = submission_id self.runtime_env = runtime_env @@ -28,6 +47,10 @@ def __init__(self, entrypoint: str, submission_id: str = None, runtime_env: str self.resources = resources def to_string(self) -> str: + """ + Convert to string + :return: string representation of job submission + """ val = f"entrypoint = {self.entrypoint}" if self.submission_id is not None: val += f", submission_id = {self.submission_id}" @@ -43,7 +66,11 @@ def to_string(self) -> str: val += f", resources = {self.resources}" return val - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of job submission + """ dct = {"entrypoint": self.entrypoint} if self.submission_id is not None: dct["submissionId"] = self.submission_id @@ -78,29 +105,43 @@ class RayJobInfo: error_type - type of error metadata - optional, dictionary of the submission metadata """ - def __init__(self, dst: dict[str, any]) -> None: - self.entrypoint = dst.get("entrypoint", "") - self.job_id = dst.get("jobId", "") - self.submission_id = dst.get("submissionId", "") - self.status = dst.get("status", "") - self.message = dst.get("message", None) - self.start_time = int(dst.get("startTime", "0")) - self.end_time = int(dst.get("endTime", "0")) - self.error_type = dst.get("ErrorType", None) - self.metadata = dst.get("Metadata", None) - self.runtime_env = dst.get("runtimeEnv", None) + + def __init__(self, dct: dict[str, Any]): + """ + Initialize from dictionary + :param dct: dictionary representation of Ray job info + """ + self.entrypoint = dct.get("entrypoint", "") + self.job_id = dct.get("jobId", "") + self.submission_id = dct.get("submissionId", "") + self.status = dct.get("status", "") + self.message = dct.get("message", None) + self.start_time = int(dct.get("startTime", "0")) + self.end_time = int(dct.get("endTime", "0")) + self.error_type = dct.get("ErrorType", None) + self.metadata = dct.get("Metadata", None) + self.runtime_env = dct.get("runtimeEnv", None) def to_string(self) -> str: - val = (f"entrypoint = {self.entrypoint}, job id {self.job_id}, submission id = {self.submission_id}," - f" status = {self.status}") + """ + Convert to string + :return: string representation of Ray job info + """ + val = ( + f"entrypoint = {self.entrypoint}, job id {self.job_id}, submission id = {self.submission_id}," + f" status = {self.status}" + ) if self.message is not None: val += f" message = {self.message}" if self.start_time > 0: - val += (f" start time = " - f"{datetime.datetime.fromtimestamp(self.start_time /1.e3).strftime('%Y-%m-%d %H:%M:%S')}") + val += ( + f" start time = " + f"{datetime.datetime.fromtimestamp(self.start_time /1.e3).strftime('%Y-%m-%d %H:%M:%S')}" + ) if self.end_time > 0: - val += (f" end time = " - f"{datetime.datetime.fromtimestamp(self.end_time / 1e3).strftime('%Y-%m-%d %H:%M:%S')}") + val += ( + f" end time = " f"{datetime.datetime.fromtimestamp(self.end_time / 1e3).strftime('%Y-%m-%d %H:%M:%S')}" + ) if self.error_type is not None: val += f" error type = {self.error_type}" if self.runtime_env is not None: diff --git a/clients/python-apiserver-client/python_apiserver_client/params/templates.py b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py similarity index 55% rename from clients/python-apiserver-client/python_apiserver_client/params/templates.py rename to clients/python-apiserver-client/src/python_apiserver_client/params/templates.py index 937cad45289..01124913696 100644 --- a/clients/python-apiserver-client/python_apiserver_client/params/templates.py +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/templates.py @@ -1,15 +1,24 @@ import enum +from typing import Any class TolerationOperation(enum.Enum): - Exists = "Exists" - Equal = "Equal" + """ + Toleration operation types + """ + + Exists = "Exists" # exists + Equal = "Equal" # equal class TolerationEffect(enum.Enum): - NoSchedule = "NoSchedule" - PreferNoSchedule = "PreferNoSchedule" - NoExecute = "NoExecute" + """ + Toleration effect + """ + + NoSchedule = "NoSchedule" # not schedule + PreferNoSchedule = "PreferNoSchedule" # prefer not schedule + NoExecute = "NoExecute" # not execute class Toleration: @@ -24,24 +33,38 @@ class Toleration: effect - required, toleration effect supported effects are "NoSchedule", "PreferNoSchedule", "NoExecute" value - optional, value - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict + - to_dict() -> dict[str, Any] convert to dict """ - def __init__(self, key: str, operator: TolerationOperation, effect: TolerationEffect, - value: str = None) -> None: + def __init__(self, key: str, operator: TolerationOperation, effect: TolerationEffect, value: str = None): + """ + Initialization + :param key: key + :param operator: operator + :param effect: effect + :param value: value + """ self.key = key self.operator = operator self.value = value self.effect = effect def to_string(self) -> str: + """ + Convert to string + :return: string representation of toleration + """ val = f"key = {self.key}, operator = {self.operator.name}, effect = {self.effect.name}" if self.value is None: return val else: return val + f", value = {self.value}" - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> dict[str, Any]: + """ + Convert to string + :return: string representation of toleration + """ dct = {"key": self.key, "operator": self.operator.value, "effect": self.effect.value} if self.value is not None: dct["value"] = self.value @@ -69,27 +92,56 @@ class Template: memory - required, template memory (GB) gpus - optional, number of GPUs, default 0 gpu_accelerator - optional, if not defined nvidia.com/gpu is assumed + extended_resources - optional, name and number of the extended resources tolerations - optional, tolerations for pod placing, default none - to_string() -> str: convert toleration to string for printing - - to_dict() -> dict[str, any] convert to dict + - to_dict() -> dict[str, Any] convert to dict - to_json() -> str convert to json string """ - def __init__(self, name: str, namespace: str, cpu: int, memory: int, gpu: int = 0, - gpu_accelerator: str = None, tolerations: list[Toleration] = None) -> None: + + def __init__( + self, + name: str, + namespace: str, + cpu: int, + memory: int, + gpu: int = 0, + gpu_accelerator: str = None, + extended_resources: dict[str, int] = None, + tolerations: list[Toleration] = None, + ): + """ + Initialization + :param name: name + :param namespace: namespace + :param cpu: cpu + :param memory: memory + :param gpu: gpu + :param gpu_accelerator: accelerator type + :param extended_resources: extended resources + :param tolerations: tolerations + """ self.name = name self.namespace = namespace self.cpu = cpu self.memory = memory self.gpu = gpu self.gpu_accelerator = gpu_accelerator + self.extended_resources = extended_resources self.tolerations = tolerations def to_string(self) -> str: + """ + Convert to string + :return: string representation of template + """ val = f"name = {self.name}, namespace = {self.namespace}, cpu = {self.cpu}, memory = {self.memory}" if self.gpu > 0: val = val + f", gpu {self.gpu}" if self.gpu_accelerator is not None: val = val + f", gpu accelerator {self.gpu_accelerator}" + if self.extended_resources is not None: + val = val + f", extended resources {self.extended_resources}" if self.tolerations is None: return val val = val + ", tolerations [" @@ -102,12 +154,18 @@ def to_string(self) -> str: val = val + ", {" + tol.to_string() + "}" return val + "]" - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of template + """ dct = {"name": self.name, "namespace": self.namespace, "cpu": self.cpu, "memory": self.memory} if self.gpu > 0: dct["gpu"] = self.gpu if self.gpu_accelerator is not None: dct["gpu accelerator"] = self.gpu_accelerator + if self.extended_resources is not None: + dct["extended resources"] = self.extended_resources if self.tolerations is not None: dct["tolerations"] = [tl.to_dict() for tl in self.tolerations] return dct @@ -119,19 +177,45 @@ def to_dict(self) -> dict[str, any]: """ -def toleration_decoder(dct: dict[str, any]) -> Toleration: - return Toleration(key=dct.get("key"), operator=TolerationOperation(dct.get("operator", "Exists")), - effect=TolerationEffect(dct.get("effect", "NoSchedule")), value=dct.get("value")) +def toleration_decoder(dct: dict[str, Any]) -> Toleration: + """ + Create toleration from dictionary + :param dct: dictionary representation of toleration + :return: toleration + """ + return Toleration( + key=dct.get("key"), + operator=TolerationOperation(dct.get("operator", "Exists")), + effect=TolerationEffect(dct.get("effect", "NoSchedule")), + value=dct.get("value"), + ) -def template_decoder(dct: dict[str, any]) -> Template: +def template_decoder(dct: dict[str, Any]) -> Template: + """ + Create template from dictionary + :param dct: dictionary representation of template + :return: template + """ tolerations = None if "tolerations" in dct: tolerations = [toleration_decoder(d) for d in dct["tolerations"]] - return Template(name=dct.get("name"), namespace=dct.get("namespace"), cpu=int(dct.get("cpu", "0")), - memory=int(dct.get("memory", "0")), gpu=int(dct.get("gpu", "0")), - gpu_accelerator=dct.get("gpu_accelerator"), tolerations=tolerations) - - -def templates_decoder(dct: dict[str, any]) -> list[Template]: + return Template( + name=dct.get("name"), + namespace=dct.get("namespace"), + cpu=int(dct.get("cpu", "0")), + memory=int(dct.get("memory", "0")), + gpu=int(dct.get("gpu", "0")), + gpu_accelerator=dct.get("gpu_accelerator"), + extended_resources=dct.get("extended_resources"), + tolerations=tolerations, + ) + + +def templates_decoder(dct: dict[str, Any]) -> list[Template]: + """ + Create list of template from dictionary + :param dct: dictionary representation of list of template + :return: list of template + """ return [template_decoder(tmp) for tmp in dct["computeTemplates"]] diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/volumes.py b/clients/python-apiserver-client/src/python_apiserver_client/params/volumes.py new file mode 100644 index 00000000000..adaaa360b79 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/volumes.py @@ -0,0 +1,437 @@ +import enum +from typing import Any + + +class HostPath(enum.Enum): + """ + Host path enumeration + """ + + DIRECTORY = 0 # directory + FILE = 1 # files + + +class MountPropagationMode(enum.Enum): + """ + Mount propagation enumeration + """ + + NONE = 0 # None + HOSTTOCONTAINER = 1 # host to container + BIDIRECTIONAL = 2 # bi directional + + +class AccessMode(enum.Enum): + """ + Access mode enumeration + """ + + RWO = 0 # read write once + ROX = 1 # read only many + RWX = 2 # read write many + + +class BaseVolume: + """ + KubeRay currently support several types of volumes, including hostPat, PVC, + ephemeral volumes, config maps, secrets and empty dir. All of them use slightly + different parameters. Base Volume is a base class for all different volume types. + """ + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of base volume + """ + raise Exception(f"Base volume cannot be used directly. Pls use one of the derived classes") + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of base volume + """ + raise Exception(f"Base volume cannot be used directly. Pls use one of the derived classes") + + +class HostPathVolume(BaseVolume): + """ + This class implements HostPath volume. In addition to name and mount path it requires host + path volume specific parameters: + source - data location on host + hostPathType - host path type: directory (0) or file (1) + mountPropagationMode - mount propagation: None (0), host to container (1) or bidirectional (2) + + """ + + def __init__( + self, + name: str, + mount_path: str, + source: str, + host_path_type: HostPath = None, + mount_propagation: MountPropagationMode = None, + ): + """ + Initialization + :param name: name + :param mount_path: mount path + :param source: source + :param host_path_type: host path type + :param mount_propagation: mount propagation + """ + self.name = name + self.mount_path = mount_path + self.source = source + self.host_path_type = host_path_type + self.volume_type = 1 + self.mount_propagation = mount_propagation + + def to_string(self) -> str: + """ + Convert to string + :return: HostPathVolume string representation + """ + val = f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " f"volume type = hostPath" + if self.mount_propagation is not None: + val += f", mount propagation = {self.mount_propagation.name}" + if self.host_path_type is not None: + val += f", host path type = {self.host_path_type.name}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: HostPathVolume dictionary representation + """ + dst = {"name": self.name, "mountPath": self.mount_path, "source": self.source, "volumeType": self.volume_type} + if self.mount_propagation is not None: + dst["mountPropagationMode"] = self.mount_propagation.value + if self.host_path_type is not None: + dst["hostPathType"] = self.host_path_type.value + return dst + + +class PVCVolume(BaseVolume): + """ + This class implements PVC volume. In addition to name and mount path it requires + PVC volume specific parameters: + source - PVC claim name + read_only - read only flag + mountPropagationMode - mount propagation: None (0), host to container (1) or bidirectional (2) + """ + + def __init__( + self, + name: str, + mount_path: str, + source: str, + read_only: bool = False, + mount_propagation: MountPropagationMode = None, + ): + """ + Initialization + :param name: name + :param mount_path: mount path + :param source: source + :param read_only: read only + :param mount_propagation: mount propagation + """ + self.name = name + self.mount_path = mount_path + self.source = source + self.volume_type = 0 + self.mount_propagation = mount_propagation + self.readonly = read_only + + def to_string(self) -> str: + """ + Convert to string + :return: PVCVolume string representation + """ + val = f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " f"volume type = PVC" + if self.readonly: + val += ", read only = True" + if self.mount_propagation is not None: + val += f", mount propagation = {self.mount_propagation.name}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: PVCVolume dictionary representation + """ + dst = {"name": self.name, "mountPath": self.mount_path, "source": self.source, "volumeType": self.volume_type} + if self.readonly: + dst["readOnly"] = True + if self.mount_propagation is not None: + dst["mountPropagationMode"] = self.mount_propagation.value + return dst + + +class EphemeralVolume(BaseVolume): + """ + This class implements Ephemeral volume. In addition to name and mount path it requires + Ephemeral volume specific parameters: + storage - disk size (valid k8 value, for example 5Gi) + storageClass - storage class - optional, if not specified, use default + accessMode - access mode RWO - optional ReadWriteOnce (0), ReadOnlyMAny (1), ReadWriteMany (2) + mountPropagationMode - optional mount propagation: None (0), host to container (1) or bidirectional (2) + """ + + def __init__( + self, + name: str, + mount_path: str, + storage: str, + storage_class: str = None, + access_mode: AccessMode = None, + mount_propagation: MountPropagationMode = None, + ): + """ + Initialization + :param name: name + :param mount_path: mount path + :param storage: storage + :param storage_class: storage class + :param access_mode: access mode + :param mount_propagation: mount propagation + """ + self.name = name + self.mount_path = mount_path + self.storage = storage + self.volume_type = 2 + self.mount_propagation = mount_propagation + self.storage_class = storage_class + self.access_mode = access_mode + + def to_string(self) -> str: + """ + Convert to string + :return: EphemeralVolume string representation + """ + val = ( + f"name = {self.name}, mount_path = {self.mount_path}, storage = {self.storage} " f"volume type = ephemeral" + ) + if self.storage_class is not None: + val += f", storage class = {self.storage_class}" + if self.access_mode is not None: + val += f", access mode = {self.access_mode.name}" + if self.mount_propagation is not None: + val += f", mount propagation = {self.mount_propagation.name}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: EphemeralVolume dictionary representation + """ + dct = { + "name": self.name, + "mountPath": self.mount_path, + "storage": self.storage, + "volumeType": self.volume_type, + } + if self.storage_class is not None: + dct["storageClassName"] = self.storage_class + if self.access_mode is not None: + dct["accessMode"] = self.access_mode.value + if self.mount_propagation is not None: + dct["mountPropagationMode"] = self.mount_propagation.value + return dct + + +class EmptyDirVolume(BaseVolume): + """ + This class implements EmptyDir volume. In addition to name and mount path it requires + Empty Dir specific parameters: + storage - optional max storage size (valid k8 value, for example 5Gi) + """ + + def __init__(self, name: str, mount_path: str, storage: str = None): + """ + Initialization + :param name: name + :param mount_path: mount_path + :param storage: storage + """ + self.name = name + self.mount_path = mount_path + self.storage = storage + self.volume_type = 5 + + def to_string(self) -> str: + """ + Convert to string + :return: EmptyDirVolume string representation + """ + val = f"name = {self.name}, mount_path = {self.mount_path}, volume type = emptyDir" + if self.storage is not None: + val += f", storage = {self.storage}" + return val + + def to_dict(self) -> dict[str, Any]: + dct = {"name": self.name, "mountPath": self.mount_path, "volumeType": self.volume_type} + if self.storage is not None: + dct["storage"] = self.storage + return dct + + +class ConfigMapVolume(BaseVolume): + """ + This class implements ConfigMap volume. In addition to name and mount path it requires + configMap volume specific parameters: + source - required, config map name + items - optional, key/path items (optional) + """ + + def __init__( + self, + name: str, + mount_path: str, + source: str, + items: dict[str, str] = None, + ): + """ + Initialization + :param name: name + :param mount_path: mount path + :param source: source + :param items: items + """ + self.name = name + self.mount_path = mount_path + self.source = source + self.items = items + self.volume_type = 3 + + def to_string(self) -> str: + """ + Convert to string + :return: ConfigMapVolume string representation + """ + val = ( + f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " f"volume type = configmap" + ) + if self.items is not None: + val = val + f", items = {str(self.items)}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: ConfigMapVolume dictionary representation + """ + dct = {"name": self.name, "mountPath": self.mount_path, "source": self.source, "volumeType": self.volume_type} + if self.items is not None: + dct["items"] = self.items + return dct + + +class SecretVolume(BaseVolume): + """ + This class implements Secret volume. In addition to name and mount path it requires + Secret volume specific parameters: + source - required, secret name + items - optional, key/path items (optional) + """ + + def __init__( + self, + name: str, + mount_path: str, + source: str, + items: dict[str, str] = None, + ): + self.name = name + self.mount_path = mount_path + self.source = source + self.items = items + self.volume_type = 4 + + def to_string(self) -> str: + val = f"name = {self.name}, mount_path = {self.mount_path}, source = {self.source}, " f"volume type = secret" + if self.items is not None: + val = val + f", items = {str(self.items)}" + return val + + def to_dict(self) -> dict[str, Any]: + dct = {"name": self.name, "mountPath": self.mount_path, "source": self.source, "volumeType": self.volume_type} + if self.items is not None: + dct["items"] = self.items + return dct + + +""" + Creates new Volume from dictionary, used for unmarshalling json. Python does not + support multiple constructors, so do it this way +""" + + +def volume_decoder(dst: dict[str, Any]) -> BaseVolume: + def _get_mount_propagation() -> MountPropagationMode: + if "mountPropagationMode" in dst: + return MountPropagationMode(int(dst.get("mountPropagationMode", "0"))) + return None + + def _get_host_path() -> HostPath: + if "hostPathType" in dst: + return HostPath(int(dst.get("hostPathType", "0"))) + return None + + def _get_access_mode() -> AccessMode: + if "accessMode" in dst: + return AccessMode(int(dst.get("accessMode", "0"))) + return None + + match dst["volumeType"]: + case 0: + # PVC + return PVCVolume( + name=dst.get("name", ""), + mount_path=dst.get("mountPath", ""), + source=dst.get("source", ""), + read_only=dst.get("readOnly", False), + mount_propagation=_get_mount_propagation(), + ) + case 1: + # host path + return HostPathVolume( + name=dst.get("name", ""), + mount_path=dst.get("mountPath", ""), + source=dst.get("source", ""), + host_path_type=_get_host_path(), + mount_propagation=_get_mount_propagation(), + ) + case 2: + # Ephemeral volume + return EphemeralVolume( + name=dst.get("name", ""), + mount_path=dst.get("mountPath", ""), + storage=dst.get("storage", ""), + storage_class=dst.get("storageClassName"), + access_mode=_get_access_mode(), + mount_propagation=_get_mount_propagation(), + ) + case 3: + # ConfigMap Volume + return ConfigMapVolume( + name=dst.get("name", ""), + mount_path=dst.get("mountPath", ""), + source=dst.get("source", ""), + items=dst.get("items"), + ) + case 4: + # Secret Volume + return SecretVolume( + name=dst.get("name", ""), + mount_path=dst.get("mountPath", ""), + source=dst.get("source", ""), + items=dst.get("items"), + ) + case 5: + # Empty dir volume + return EmptyDirVolume( + name=dst.get("name", ""), mount_path=dst.get("mountPath", ""), storage=dst.get("storage") + ) + case _: + raise Exception(f"Unknown volume type in {dst}") diff --git a/clients/python-apiserver-client/src/python_apiserver_client/params/workernode.py b/clients/python-apiserver-client/src/python_apiserver_client/params/workernode.py new file mode 100644 index 00000000000..3f45597f946 --- /dev/null +++ b/clients/python-apiserver-client/src/python_apiserver_client/params/workernode.py @@ -0,0 +1,194 @@ +from typing import Any + +from python_apiserver_client.params import ( + BaseVolume, + EnvironmentVariables, + environment_variables_decoder, + volume_decoder, +) + + +DEFAULT_WORKER_START_PARAMS = {"node-ip-address": "$MY_POD_IP"} + + +class WorkerNodeSpec: + """ + WorkerNodeSpec is used to define Ray cluster worker node pool configuration. + It provides APIs to create, stringify and convert to dict. + + Methods: + - Create worker node pool specification: gets the following parameters: + group_name - required, group name of the worker group + compute_template - required, the computeTemplate of worker node group + replicas - required, desired replicas of the worker group + min_replicas - required Min replicas of the worker group, can't be greater than max_replicas + max_replicas - required, max replicas of the worker group + ray_start_params - required, Ray start parameters + image - optional, image used for worker node + volumes - optional, a list of volumes to attach to worker node + service_account - optional, a service account (has to exist) to run worker node + image_pull_secret - optional, secret to pull worker node image from registry + environment - optional, environment variables for worker pod + annotations - optional, annotations for worker node + labels - optional, labels for worker node + image_pull_policy - optional, worker node pull image policy. Default IfNotPresent + """ + + def __init__( + self, + group_name: str, + compute_template: str, + image: str, + max_replicas: int, + replicas: int = 1, + min_replicas: int = 0, + ray_start_params: dict[str, str] = DEFAULT_WORKER_START_PARAMS, + volumes: list[BaseVolume] = None, + service_account: str = None, + image_pull_secret: str = None, + environment: EnvironmentVariables = None, + annotations: dict[str, str] = None, + labels: dict[str, str] = None, + image_pull_policy: str = None, + ): + """ + Initialization + :param group_name: name + :param compute_template: compute template + :param replicas: number of replicas + :param min_replicas: min number of replicas + :param max_replicas: max number of replicas + :param ray_start_params: ray start parameters + :param image: image name + :param volumes: volumes + :param service_account: service account + :param image_pull_secret: image pull secret + :param environment: environment + :param annotations: annotations + :param labels: labels + :param image_pull_policy: image pull policy + """ + # Validate replicas + if min_replicas > replicas: + raise RuntimeError(f"min_replicas {min_replicas} is can't be greater then replicas {replicas} ") + if replicas > max_replicas: + raise RuntimeError(f"replicas {replicas} is can't be greater then max_replicas {max_replicas} ") + + self.group_name = group_name + self.compute_template = compute_template + self.replicas = replicas + self.min_replicas = min_replicas + self.max_replicas = max_replicas + self.ray_start_params = ray_start_params + self.ray_start_params.update(DEFAULT_WORKER_START_PARAMS) + self.image = image + self.volumes = volumes + self.service_account = service_account + self.image_pull_secret = image_pull_secret + self.environment = environment + self.annotations = annotations + self.labels = labels + self.image_pull_policy = image_pull_policy + + def to_string(self) -> str: + """ + Convert to string + :return: string representation of worker node spec + """ + val = ( + f"group_name = {self.group_name}, compute template = {self.compute_template}, " + f"replicas = {self.replicas}, min_replicas = {self.min_replicas}, " + f"max_replicas = {self.max_replicas}, ray start params = {str(self.ray_start_params)}" + ) + if self.image is not None: + val += f", image = {self.image}" + if self.service_account is not None: + val += f", service_account = {self.service_account}" + if self.image_pull_secret is not None: + val += f", image_pull_secret = {self.image_pull_secret}" + if self.image_pull_policy is not None: + val += f", image_pull_policy = {self.image_pull_policy}" + if self.volumes is not None: + val = val + ",\n volumes = [" + first = True + for v in self.volumes: + if first: + first = False + else: + val += ", " + val = val + "{" + v.to_string() + "}" + val = val + "]" + if self.environment is not None: + val = val + f",\n environment = {self.environment.to_string()}" + if self.annotations is not None: + val = val + f",\n annotations = {str(self.annotations)}" + if self.labels is not None: + val = val + f",\n labels = {str(self.labels)}" + return val + + def to_dict(self) -> dict[str, Any]: + """ + Convert to dictionary + :return: dictionary representation of worker node spec + """ + dct = { + "groupName": self.group_name, + "computeTemplate": self.compute_template, + "replicas": self.replicas, + "minReplicas": self.min_replicas, + "maxReplicas": self.max_replicas, + "rayStartParams": self.ray_start_params, + } + if self.image is not None: + dct["image"] = self.image + if self.service_account is not None: + dct["service_account"] = self.service_account + if self.image_pull_secret is not None: + dct["imagePullSecret"] = self.image_pull_secret + if self.image_pull_policy is not None: + dct["imagePullPolicy"] = self.image_pull_policy + if self.volumes is not None: + dct["volumes"] = [v.to_dict() for v in self.volumes] + if self.environment is not None: + dct["environment"] = self.environment.to_dict() + if self.annotations is not None: + dct["annotations"] = self.annotations + if self.labels is not None: + dct["labels"] = self.labels + return dct + + +""" + Creates new worker node from dictionary, used for unmarshalling json. Python does not + support multiple constructors, so do it this way +""" + + +def worker_node_spec_decoder(dct: dict[str, Any]) -> WorkerNodeSpec: + """ + Create worker node spec from dictionary + :param dct: dictionary definition of worker node spec + :return: worker node spec + """ + volumes = None + if "volumes" in dct: + volumes = [volume_decoder(v) for v in dct["volumes"]] + environments = None + if "environment" in dct and len(dct.get("environment")) > 0: + environments = environment_variables_decoder(dct.get("environment")) + return WorkerNodeSpec( + group_name=dct.get("groupName"), + compute_template=dct.get("computeTemplate"), + replicas=dct.get("replicas", 0), + min_replicas=dct.get("minReplicas", 0), + max_replicas=dct.get("maxReplicas", 0), + ray_start_params=dct.get("rayStartParams"), + image=dct.get("image"), + volumes=volumes, + service_account=dct.get("service_account", None), + image_pull_secret=dct.get("imagePullSecret", None), + image_pull_policy=dct.get("imagePullPolicy", None), + environment=environments, + annotations=dct.get("annotations", None), + labels=dct.get("labels", None), + ) diff --git a/clients/python-apiserver-client/test/api_params_test.py b/clients/python-apiserver-client/test/api_params_test.py new file mode 100644 index 00000000000..37b0e3c45d1 --- /dev/null +++ b/clients/python-apiserver-client/test/api_params_test.py @@ -0,0 +1,427 @@ +import json + +from python_apiserver_client.params import ( + DEFAULT_HEAD_START_PARAMS, + DEFAULT_WORKER_START_PARAMS, + AccessMode, + AutoscalerOptions, + Cluster, + ClusterEvent, + ClusterSpec, + ConfigMapVolume, + EmptyDirVolume, + Environment, + EnvironmentVariables, + EnvVarFrom, + EnvVarSource, + EphemeralVolume, + HeadNodeSpec, + HostPath, + HostPathVolume, + MountPropagationMode, + PVCVolume, + RayJobInfo, + RayJobRequest, + SecretVolume, + ServiceType, + Template, + Toleration, + TolerationEffect, + TolerationOperation, + WorkerNodeSpec, + autoscaling_decoder, + cluster_decoder, + cluster_spec_decoder, + env_var_from_decoder, + environment_variables_decoder, + head_node_spec_decoder, + template_decoder, + toleration_decoder, + volume_decoder, + worker_node_spec_decoder, +) + + +def test_toleration(): + + tol1 = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) + print(f"\ntoleration 1: {tol1.to_string()}") + t1_json = json.dumps(tol1.to_dict()) + print(f"toleration 1 JSON: {t1_json}") + + tol2 = Toleration( + key="blah2", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute, value="value" + ) + print(f"toleration 2: {tol2.to_string()}") + t2_json = json.dumps(tol2.to_dict()) + print(f"toleration 2 JSON: {t2_json}") + + assert tol1.to_string() == toleration_decoder(json.loads(t1_json)).to_string() + assert tol2.to_string() == toleration_decoder(json.loads(t2_json)).to_string() + + +def test_templates(): + + tol1 = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) + tol2 = Toleration( + key="blah2", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute, value="value" + ) + + temp1 = Template(name="template1", namespace="namespace", cpu=1, memory=4, tolerations=[tol1, tol2]) + print(f"\ntemplate 1: {temp1.to_string()}") + tm1_json = json.dumps(temp1.to_dict()) + print(f"template 1 JSON: {tm1_json}") + + temp2 = Template(name="template2", namespace="namespace", cpu=2, memory=8, gpu=1) + print(f"template 2: {temp2.to_string()}") + tm2_json = json.dumps(temp2.to_dict()) + print(f"template 2 JSON: {tm2_json}") + + temp3 = Template(name="template3", namespace="namespace", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32}) + print(f"template 3: {temp3.to_string()}") + tm3_json = json.dumps(temp3.to_dict()) + print(f"template 3 JSON: {tm3_json}") + + assert temp1.to_string() == template_decoder(json.loads(tm1_json)).to_string() + assert temp2.to_string() == template_decoder(json.loads(tm2_json)).to_string() + assert temp3.to_string() == template_decoder(json.loads(tm3_json)).to_string() + + +def test_volumes(): + + # hostPath + vol = HostPathVolume( + name="hostPath", + mount_path="tmp/hostPath", + source="source", + host_path_type=HostPath.FILE, + mount_propagation=MountPropagationMode.NONE, + ) + print(f"\nhostPath volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"host path volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + vol = PVCVolume( + name="pvc", + mount_path="tmp/pvc", + source="claim", + read_only=True, + mount_propagation=MountPropagationMode.BIDIRECTIONAL, + ) + print(f"PVC volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"PVC volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + vol = EphemeralVolume( + name="ephemeral", mount_path="tmp/ephemeral", storage="5Gi", storage_class="blah", access_mode=AccessMode.RWX + ) + print(f"Ephemeral volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"Ephemeral volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + vol = EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir") + print(f"Empty dir volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"Empty dir volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + vol = ConfigMapVolume( + name="confmap", mount_path="tmp/confmap", source="my-map", items={"sample_code.py": "sample_code.py"} + ) + print(f"config map volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"config map volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + vol = SecretVolume(name="secret", mount_path="tmp/secret", source="my-secret") + print(f"secret volume: {vol.to_string()}") + vol_json = json.dumps(vol.to_dict()) + print(f"secret volume json: {vol_json}") + assert volume_decoder(json.loads(vol_json)).to_string() == vol.to_string() + + +def test_environment(): + + env_v = EnvVarFrom(source=EnvVarSource.SECRET, name="my-secret", key="key") + print(f"\nEnv variable from: {env_v.to_string()}") + env_v_json = json.dumps(env_v.to_dict()) + print(f"Env variable from JSON: {env_v_json}") + assert env_var_from_decoder(json.loads(env_v_json)).to_string() == env_v.to_string() + + envs = EnvironmentVariables(key_value={"key": "val"}, from_ref={"key_ref": env_v}) + print(f"Env variables: {envs.to_string()}") + envs_json = json.dumps(envs.to_dict()) + print(f"Env variables JSON: {envs_json}") + assert environment_variables_decoder(json.loads(envs_json)).to_string() == envs.to_string() + + envs = EnvironmentVariables(from_ref={"key_ref": env_v}) + print(f"Env variables: {envs.to_string()}") + envs_json = json.dumps(envs.to_dict()) + print(f"Env variables JSON: {envs_json}") + assert environment_variables_decoder(json.loads(envs_json)).to_string() == envs.to_string() + + envs = EnvironmentVariables(key_value={"key": "val"}) + print(f"Env variables: {envs.to_string()}") + envs_json = json.dumps(envs.to_dict()) + print(f"Env variables JSON: {envs_json}") + assert environment_variables_decoder(json.loads(envs_json)).to_string() == envs.to_string() + + +def test_head_node_spec(): + + env_v = EnvVarFrom(source=EnvVarSource.SECRET, name="my-secret", key="key") + env_s = EnvironmentVariables(key_value={"key": "val"}, from_ref={"key_ref": env_v}) + volumes = [ + PVCVolume( + name="pvc", + mount_path="tmp/pvc", + source="claim", + read_only=True, + mount_propagation=MountPropagationMode.BIDIRECTIONAL, + ), + EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir"), + ] + + head = HeadNodeSpec( + compute_template="template", + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_HEAD_START_PARAMS, + enable_ingress=True, + service_type=ServiceType.ClusterIP, + volumes=volumes, + environment=env_s, + image_pull_policy="Always", + ) + print(f"\nhead node: {head.to_string()}") + head_json = json.dumps(head.to_dict()) + print(f"head node JSON: {head_json}") + assert head_node_spec_decoder(json.loads(head_json)).to_string() == head.to_string() + + +def test_worker_node_spec(): + + env_v = EnvVarFrom(source=EnvVarSource.SECRET, name="my-secret", key="key") + env_s = EnvironmentVariables(key_value={"key": "val"}, from_ref={"key_ref": env_v}) + volumes = [ + PVCVolume( + name="pvc", + mount_path="tmp/pvc", + source="claim", + read_only=True, + mount_propagation=MountPropagationMode.BIDIRECTIONAL, + ), + EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir"), + ] + + worker = WorkerNodeSpec( + group_name="group", + compute_template="template", + image="rayproject/ray:2.9.0-py310", + replicas=2, + min_replicas=2, + max_replicas=2, + volumes=volumes, + ray_start_params=DEFAULT_WORKER_START_PARAMS, + environment=env_s, + labels={"key": "value"}, + image_pull_policy="IfNotPresent", + ) + print(f"\nworker node: {worker.to_string()}") + worker_json = json.dumps(worker.to_dict()) + print(f"worker node JSON: {worker_json}") + assert worker_node_spec_decoder(json.loads(worker_json)).to_string() == worker.to_string() + + +def test_autoscaler_options(): + options = AutoscalerOptions() + print(f"\nautoscaler options: {options.to_string()}") + options_json = json.dumps(options.to_dict()) + print(f"autoscaler options JSON: {options_json}") + assert autoscaling_decoder(json.loads(options_json)).to_string() == options.to_string() + + options = AutoscalerOptions(cpus="1.0", memory="64GB") + print(f"\nautoscaler options: {options.to_string()}") + options_json = json.dumps(options.to_dict()) + print(f"autoscaler options JSON: {options_json}") + assert autoscaling_decoder(json.loads(options_json)).to_string() == options.to_string() + + +def test_cluster_spec(): + env_s = EnvironmentVariables( + key_value={"key": "val"}, + from_ref={"key_ref": EnvVarFrom(source=EnvVarSource.SECRET, name="my-secret", key="key")}, + ) + volumes = [ + PVCVolume( + name="pvc", + mount_path="tmp/pvc", + source="claim", + read_only=True, + mount_propagation=MountPropagationMode.BIDIRECTIONAL, + ), + EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir"), + ] + spec = ClusterSpec( + head_node=HeadNodeSpec( + compute_template="template", + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_HEAD_START_PARAMS, + volumes=volumes, + enable_ingress=True, + service_type=ServiceType.ClusterIP, + environment=env_s, + ), + worker_groups=[ + WorkerNodeSpec( + group_name="group", + compute_template="template", + replicas=2, + min_replicas=2, + max_replicas=2, + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_WORKER_START_PARAMS, + volumes=volumes, + environment=env_s, + labels={"key": "value"}, + ), + WorkerNodeSpec( + group_name="group1", + compute_template="template1", + replicas=2, + min_replicas=2, + max_replicas=2, + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_WORKER_START_PARAMS, + volumes=volumes, + environment=env_s, + labels={"key": "value"}, + ), + ], + autoscaling_options=AutoscalerOptions(), + ) + print(f"\ncluster spec: {spec.to_string()}") + spec_json = json.dumps(spec.to_dict()) + print(f"cluster spec JSON: {spec_json}") + assert cluster_spec_decoder(json.loads(spec_json)).to_string() == spec.to_string() + + +def test_cluster(): + + event = { + "id": "id", + "name": "name", + "created_at": "ts", + "first_timestamp": "ts", + "last_timestamp": "ts", + "reason": "reason", + "message": "message", + "type": "warning", + "count": "1", + } + print(f"\ncluster event: {ClusterEvent(event).to_string()}") + env_s = EnvironmentVariables( + key_value={"key": "val"}, + from_ref={"key_ref": EnvVarFrom(source=EnvVarSource.SECRET, name="my-secret", key="key")}, + ) + volumes = [ + PVCVolume( + name="pvc", + mount_path="tmp/pvc", + source="claim", + read_only=True, + mount_propagation=MountPropagationMode.BIDIRECTIONAL, + ), + EmptyDirVolume(name="emptyDir", mount_path="tmp/emptyDir"), + ] + spec = ClusterSpec( + head_node=HeadNodeSpec( + compute_template="template", + ray_start_params=DEFAULT_HEAD_START_PARAMS, + enable_ingress=True, + service_type=ServiceType.ClusterIP, + volumes=volumes, + environment=env_s, + annotations={"a_key": "a_val"}, + image="rayproject/ray:2.9.0-py310", + ), + worker_groups=[ + WorkerNodeSpec( + group_name="group", + compute_template="template", + replicas=2, + min_replicas=2, + max_replicas=2, + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_WORKER_START_PARAMS, + volumes=volumes, + environment=env_s, + labels={"key": "value"}, + ), + WorkerNodeSpec( + group_name="group1", + compute_template="template1", + replicas=2, + min_replicas=2, + max_replicas=2, + image="rayproject/ray:2.9.0-py310", + ray_start_params=DEFAULT_WORKER_START_PARAMS, + volumes=volumes, + environment=env_s, + labels={"key": "value"}, + ), + ], + ) + cluster = Cluster( + name="test", + namespace="default", + user="boris", + version="2.9.0", + cluster_spec=spec, + deployment_environment=Environment.DEV, + cluster_environment=env_s, + ) + print(f"cluster: {cluster.to_string()}") + cluster_json = json.dumps(cluster.to_dict()) + print(f"cluster JSON: {cluster_json}") + assert cluster_decoder(json.loads(cluster_json)).to_string() == cluster.to_string() + + cluster_dict = cluster.to_dict() + cluster_dict["created_at"] = "created" + cluster_dict["created_status"] = "status" + cluster_dict["events"] = [event] + print(f"cluster with output: {cluster_decoder(cluster_dict).to_string()}") + + +def test_submission(): + yaml = """ + pip: + - requests==2.26.0 + - pendulum==2.1.2 + env_vars: + counter_name: test_counter + """ + request = RayJobRequest(entrypoint="python /home/ray/samples/sample_code.py", runtime_env=yaml, num_cpu=0.5) + print(f"job request: {request.to_string()}") + request_json = json.dumps(request.to_dict()) + print(f"request JSON: {request_json}") + + info_json = """ + { + "entrypoint":"python /home/ray/samples/sample_code.py", + "jobId":"02000000", + "submissionId":"raysubmit_KWZLwme56esG3Wcr", + "status":"SUCCEEDED", + "message":"Job finished successfully.", + "startTime":"1699442662879", + "endTime":"1699442682405", + "runtimeEnv":{ + "env_vars":"map[counter_name:test_counter]", + "pip":"[requests==2.26.0 pendulum==2.1.2]" + } + } + """ + job_info = RayJobInfo(json.loads(info_json)) + print(job_info.to_string()) diff --git a/clients/python-apiserver-client/test/configmaps.py b/clients/python-apiserver-client/test/configmaps.py new file mode 100644 index 00000000000..6038df81e7b --- /dev/null +++ b/clients/python-apiserver-client/test/configmaps.py @@ -0,0 +1,60 @@ +from kubernetes import client, config + + +CMAP_VALUE = """ +import ray +import os +import requests + +ray.init() + +@ray.remote +class Counter: + def __init__(self): + # Used to verify runtimeEnv + self.name = os.getenv("counter_name") + assert self.name == "test_counter" + self.counter = 0 + + def inc(self): + self.counter += 1 + + def get_counter(self): + return "{} got {}".format(self.name, self.counter) + +counter = Counter.remote() + +for _ in range(5): + ray.get(counter.inc.remote()) + print(ray.get(counter.get_counter.remote())) + +# Verify that the correct runtime env was used for the job. +assert requests.__version__ == "2.26.0" +""" +CMAP_NAME = "ray-job-code-sample" + + +class ConfigmapsManager: + """ + Simple support class to manage config maps. Assumes local access to Kubectl + """ + + def __init__(self): + config.load_kube_config() + self.api_instance = client.CoreV1Api() + + def list_configmaps(self) -> list[str]: + cm_list = self.api_instance.list_namespaced_config_map(namespace="default").items + return [cm.metadata.name for cm in cm_list] + + def create_code_map(self) -> None: + cmap = client.V1ConfigMap() + cmap.metadata = client.V1ObjectMeta(name=CMAP_NAME) + cmap.data = {"sample_code.py": CMAP_VALUE} + self.api_instance.create_namespaced_config_map(namespace="default", body=cmap) + + def delete_code_map(self) -> None: + try: + self.api_instance.delete_namespaced_config_map(name="ray-job-code-sample", namespace="default") + except Exception as e: + print("config map ray-job-code-sample does not exist") diff --git a/clients/python-apiserver-client/test/kuberay_api_test.py b/clients/python-apiserver-client/test/kuberay_api_test.py new file mode 100644 index 00000000000..bb522d5ffb0 --- /dev/null +++ b/clients/python-apiserver-client/test/kuberay_api_test.py @@ -0,0 +1,300 @@ +# (C) Copyright IBM Corp. 2024. +# Licensed under the Apache License, Version 2.0 (the “License”); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an “AS IS” BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import time + +from configmaps import ConfigmapsManager +from python_apiserver_client import KubeRayAPIs +from python_apiserver_client.params import ( + DEFAULT_WORKER_START_PARAMS, + AutoscalerOptions, + Cluster, + ClusterSpec, + ConfigMapVolume, + EnvironmentVariables, + HeadNodeSpec, + RayJobRequest, + ServiceType, + Template, + Toleration, + TolerationEffect, + TolerationOperation, + UpscalingMode, + WorkerNodeSpec, +) + + +def test_templates(): + """ + Test template + """ + # create API server + apis = KubeRayAPIs() + # cleanup + _, _ = apis.delete_compute_template(ns="default", name="default-template") + # create + toleration = Toleration(key="blah1", operator=TolerationOperation.Exists, effect=TolerationEffect.NoExecute) + template = Template(name="default-template", namespace="default", cpu=2, memory=8, gpu=1, extended_resources={"vpc.amazonaws.com/efa": 32}, tolerations=[toleration]) + status, error = apis.create_compute_template(template) + assert status == 200 + assert error is None + # duplicate create should fail + status, error = apis.create_compute_template(template) + assert status != 200 + assert error is not None + print(f"\nstatus {status}, error code: {str(error)}") + # get + status, error, t = apis.get_compute_template(ns="default", name="default-template") + assert status == 200 + assert error is None + assert template.to_string() == t.to_string() + # list + status, error, template_array = apis.list_compute_templates() + assert status == 200 + assert error is None + assert template.to_string() == template_array[0].to_string() + # list ns + status, error, template_array = apis.list_compute_templates_namespace(ns="default") + assert status == 200 + assert error is None + assert template.to_string() == template_array[0].to_string() + # delete + status, error = apis.delete_compute_template(ns="default", name="default-template") + assert status == 200 + assert error is None + # duplicate delete should fail + status, error = apis.delete_compute_template(ns="default", name="default-template") + assert status != 200 + assert error is not None + print(f"status: {status}, err = {str(error)}") + + +def test_cluster(): + """ + Test cluster + """ + # create API server + apis = KubeRayAPIs() + # cleanup + _, _ = apis.delete_compute_template(ns="default", name="default-template") + _, _ = apis.delete_cluster(ns="default", name="test") + # Create configmap + cm_manager = ConfigmapsManager() + cm_manager.delete_code_map() + cm_manager.create_code_map() + # Create template first + template = Template(name="default-template", namespace="default", cpu=2, memory=4) + status, error = apis.create_compute_template(template) + assert status == 200 + assert error is None + # cluster + volume = ConfigMapVolume( + name="code-sample", + mount_path="/home/ray/samples", + source="ray-job-code-sample", + items={"sample_code.py": "sample_code.py"}, + ) + environment = EnvironmentVariables(key_value={"key": "value"}) + head = HeadNodeSpec( + compute_template="default-template", + ray_start_params={"metrics-export-port": "8080", "num-cpus": "0"}, + image="rayproject/ray:2.9.3-py310", + service_type=ServiceType.ClusterIP, + volumes=[volume], + environment=environment, + image_pull_policy="Always", + ) + worker = WorkerNodeSpec( + group_name="small", + compute_template="default-template", + replicas=1, + min_replicas=1, + max_replicas=1, + ray_start_params=DEFAULT_WORKER_START_PARAMS, + image="rayproject/ray:2.9.3-py310", + volumes=[volume], + environment=environment, + image_pull_policy="Always", + ) + t_cluster = Cluster( + name="test", + namespace="default", + user="boris", + version="2.9.0", + cluster_spec=ClusterSpec(head_node=head, worker_groups=[worker]), + ) + # create + status, error = apis.create_cluster(t_cluster) + assert status == 200 + assert error is None + # get + status, error, c = apis.get_cluster(ns="default", name="test") + assert status == 200 + assert error is None + print(f"\ngot cluster: {c.to_string()}") + # list + status, error, clusters = apis.list_clusters() + assert status == 200 + assert error is None + assert len(clusters) == 1 + print(f"got cluster: {clusters[0].to_string()}") + # list namespace + status, error, clusters = apis.list_clusters_namespace(ns="default") + assert status == 200 + assert error is None + assert len(clusters) == 1 + print(f"got cluster: {clusters[0].to_string()}") + # get cluster status + status, error, cs = apis.get_cluster_status(ns="default", name="test") + assert status == 200 + assert error is None + print(f"cluster status is {cs}") + # Wait for the cluster to get ready + status, error = apis.wait_cluster_ready(ns="default", name="test") + assert status == 200 + assert error is None + # get endpoints + status, error, endpoint = apis.get_cluster_endpoints(ns="default", name="test") + assert status == 200 + assert error is None + print(f"cluster endpoints is {endpoint}") + # delete cluster + status, error = apis.delete_cluster(ns="default", name="test") + assert status == 200 + assert error is None + # delete template + status, error = apis.delete_compute_template(ns="default", name="default-template") + assert status == 200 + assert error is None + + +def test_job_submission(): + """ + Test job submission + :return: + """ + # create API server + apis = KubeRayAPIs() + # cleanup + _, _ = apis.delete_compute_template(ns="default", name="default-template") + _, _ = apis.delete_cluster(ns="default", name="test-job") + # Create configmap + cm_manager = ConfigmapsManager() + cm_manager.delete_code_map() + cm_manager.create_code_map() + # Create template first + template = Template(name="default-template", namespace="default", cpu=2, memory=4) + status, error = apis.create_compute_template(template) + assert status == 200 + assert error is None + # cluster + volume = ConfigMapVolume( + name="code-sample", + mount_path="/home/ray/samples", + source="ray-job-code-sample", + items={"sample_code.py": "sample_code.py"}, + ) + environment = EnvironmentVariables(key_value={"key": "value"}) + head = HeadNodeSpec( + compute_template="default-template", + ray_start_params={"metrics-export-port": "8080", "num-cpus": "0"}, + image="rayproject/ray:2.9.3-py310", + service_type=ServiceType.ClusterIP, + volumes=[volume], + environment=environment, + image_pull_policy="IfNotPresent", + ) + worker = WorkerNodeSpec( + group_name="small", + compute_template="default-template", + replicas=0, + min_replicas=0, + max_replicas=2, + ray_start_params=DEFAULT_WORKER_START_PARAMS, + image="rayproject/ray:2.9.3-py310", + volumes=[volume], + environment=environment, + image_pull_policy="IfNotPresent", + ) + autoscaling = AutoscalerOptions(upscaling_mode=UpscalingMode.Default) + t_cluster = Cluster( + name="test-job", + namespace="default", + user="boris", + version="2.9.0", + cluster_spec=ClusterSpec(head_node=head, worker_groups=[worker], autoscaling_options=autoscaling), + ) + # create + status, error = apis.create_cluster(t_cluster) + assert status == 200 + assert error is None + # Wait for the cluster to get ready + status, error = apis.wait_cluster_ready(ns="default", name="test-job") + assert status == 200 + assert error is None + # submit Ray job + resource_yaml = """ + pip: + - requests==2.26.0 + - pendulum==2.1.2 + env_vars: + counter_name: test_counter + """ + job_request = RayJobRequest( + entrypoint="python /home/ray/samples/sample_code.py", runtime_env=resource_yaml, num_cpu=0.5 + ) + # To ensure that Ray cluster HTTP is ready try to get jobs info from the cluster + status, error, job_info_array = apis.list_job_info(ns="default", name="test-job") + assert status == 200 + assert error is None + print("\n initial jobs info") + for inf in job_info_array: + print(f" {inf.to_string()}") + time.sleep(5) + status, error, sid = apis.submit_job(ns="default", name="test-job", job_request=job_request) + assert status == 200 + assert error is None + time.sleep(10) + # get Ray job info + status, error, jinfo = apis.get_job_info(ns="default", name="test-job", sid=sid) + assert status == 200 + assert error is None + print(f"\njobs info {jinfo.to_string()}") + # get Ray jobs info + status, error, job_info_array = apis.list_job_info(ns="default", name="test-job") + assert status == 200 + assert error is None + print("jobs info") + for inf in job_info_array: + print(f" {inf.to_string()}") + # get Ray job log + time.sleep(5) # wait till log is available + status, error, jlog = apis.get_job_log(ns="default", name="test-job", sid=sid) + assert status == 200 + assert error is None + print(f"job log {jlog}") + # stop Ray job + status, error = apis.stop_ray_job(ns="default", name="test-job", sid=sid) + assert status == 200 + assert error is None + # delete Ray job + status, error = apis.delete_ray_job(ns="default", name="test-job", sid=sid) + assert status == 200 + assert error is None + # delete cluster + status, error = apis.delete_cluster(ns="default", name="test-job") + assert status == 200 + assert error is None + # delete template + status, error = apis.delete_compute_template(ns="default", name="default-template") + assert status == 200 + assert error is None diff --git a/clients/python-client/.gitignore b/clients/python-client/.gitignore index f2b07cb0b37..d6d73f9c8c8 100644 --- a/clients/python-client/.gitignore +++ b/clients/python-client/.gitignore @@ -32,4 +32,4 @@ htmlcov .coverage .cache nosetests.xml -coverage.xml \ No newline at end of file +coverage.xml diff --git a/clients/python-client/examples/complete-example.py b/clients/python-client/examples/complete-example.py index feb37af41f2..502898e9a13 100644 --- a/clients/python-client/examples/complete-example.py +++ b/clients/python-client/examples/complete-example.py @@ -3,8 +3,8 @@ from os import path -""" -in case you are working directly with the source, and don't wish to +""" +in case you are working directly with the source, and don't wish to install the module with pip install, you can directly import the packages by uncommenting the following code. """ @@ -65,7 +65,7 @@ def main(): return my_kuberay_api.create_ray_cluster(body=cluster2) # this is the api call to create the cluster2 in k8s - + # modifying the number of replicas in the workergroup cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( cluster2, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 diff --git a/clients/python-client/examples/use-builder.py b/clients/python-client/examples/use-builder.py index e4c98f7c4f5..55fadf68d76 100644 --- a/clients/python-client/examples/use-builder.py +++ b/clients/python-client/examples/use-builder.py @@ -4,8 +4,8 @@ import json -""" -in case you are working directly with the source, and don't wish to +""" +in case you are working directly with the source, and don't wish to install the module with pip install, you can directly import the packages by uncommenting the following code. """ diff --git a/clients/python-client/examples/use-director.py b/clients/python-client/examples/use-director.py index ee5cef228a0..ac79f0c5aa0 100644 --- a/clients/python-client/examples/use-director.py +++ b/clients/python-client/examples/use-director.py @@ -4,8 +4,8 @@ import json import time -""" -in case you are working directly with the source, and don't wish to +""" +in case you are working directly with the source, and don't wish to install the module with pip install, you can directly import the packages by uncommenting the following code. """ diff --git a/clients/python-client/examples/use-utils.py b/clients/python-client/examples/use-utils.py index 202195b3b99..2b2a44c51bd 100644 --- a/clients/python-client/examples/use-utils.py +++ b/clients/python-client/examples/use-utils.py @@ -4,8 +4,8 @@ import json -""" -in case you are working directly with the source, and don't wish to +""" +in case you are working directly with the source, and don't wish to install the module with pip install, you can directly import the packages by uncommenting the following code. """ diff --git a/clients/python-client/python_client/kuberay_cluster_api.py b/clients/python-client/python_client/kuberay_cluster_api.py index 48c28378843..d7ee636fb79 100644 --- a/clients/python-client/python_client/kuberay_cluster_api.py +++ b/clients/python-client/python_client/kuberay_cluster_api.py @@ -136,7 +136,7 @@ def get_ray_cluster_status(self, name: str, k8s_namespace: str = "default", time else: log.error("error fetching custom resource: {}".format(e)) return None - + if resource["status"]: return resource["status"] else: @@ -177,7 +177,7 @@ def get_ray_cluster_status(self, name: str, k8s_namespace: str = "default", time else: log.error("error fetching custom resource: {}".format(e)) return None - + if resource["status"]: return resource["status"] else: @@ -208,13 +208,13 @@ def wait_until_ray_cluster_running(self, name: str, k8s_namespace: str = "defaul #TODO: once we add State to Status, we should check for that as well if status and status["head"] and status["head"]["serviceIP"]: return True - + log.info("raycluster {} status is not running yet, current status is {}".format(name, status["state"] if status else "unknown")) return False - + def create_ray_cluster(self, body: Any, k8s_namespace: str = "default") -> Any: """Create a new Ray cluster custom resource. diff --git a/clients/python-client/python_client/utils/kuberay_cluster_builder.py b/clients/python-client/python_client/utils/kuberay_cluster_builder.py index efd5af1692a..fbb62376912 100644 --- a/clients/python-client/python_client/utils/kuberay_cluster_builder.py +++ b/clients/python-client/python_client/utils/kuberay_cluster_builder.py @@ -95,7 +95,7 @@ def build_head( Parameters: - ray_image (str): Docker image for the head node. Default value is "rayproject/ray:2.9.0". - - service_type (str): Service type of the head node. Default value is "ClusterIP". + - service_type (str): Service type of the head node. Default value is "ClusterIP", which creates a headless ClusterIP service. - cpu_requests (str): CPU requests for the head node. Default value is "2". - memory_requests (str): Memory requests for the head node. Default value is "3G". - cpu_limits (str): CPU limits for the head node. Default value is "2". diff --git a/clients/python-client/python_client/utils/kuberay_cluster_utils.py b/clients/python-client/python_client/utils/kuberay_cluster_utils.py index 288e278ad76..38315cd4138 100644 --- a/clients/python-client/python_client/utils/kuberay_cluster_utils.py +++ b/clients/python-client/python_client/utils/kuberay_cluster_utils.py @@ -472,4 +472,3 @@ def is_valid_label(self, name: str) -> bool: log.error(msg) return False return True - diff --git a/clients/python-client/python_client_test/README.md b/clients/python-client/python_client_test/README.md index 09d79d52c60..b6c9a94d9f9 100644 --- a/clients/python-client/python_client_test/README.md +++ b/clients/python-client/python_client_test/README.md @@ -34,7 +34,7 @@ __install prerequisites__ `pip install coverage` __To gather data__ -`python -m coverage run -m unittest` +`python -m coverage run -m unittest` __to generate a coverage report__ `python -m coverage report` diff --git a/clients/python-client/python_client_test/test_director.py b/clients/python-client/python_client_test/test_director.py index 40d5bee8811..96d525116ac 100644 --- a/clients/python-client/python_client_test/test_director.py +++ b/clients/python-client/python_client_test/test_director.py @@ -6,7 +6,7 @@ class TestDirector(unittest.TestCase): def __init__(self, methodName: str = ...) -> None: super().__init__(methodName) self.director = kuberay_cluster_builder.Director() - + def test_build_basic_cluster(self): cluster = self.director.build_basic_cluster(name="basic-cluster") # testing meta diff --git a/clients/python-client/setup.cfg b/clients/python-client/setup.cfg index 59245124e63..67df57dcb4e 100755 --- a/clients/python-client/setup.cfg +++ b/clients/python-client/setup.cfg @@ -20,4 +20,4 @@ exclude = examples* tools* docs* - python_client_test.tests* \ No newline at end of file + python_client_test.tests* diff --git a/config/grafana/data_grafana_dashboard.json b/config/grafana/data_grafana_dashboard.json new file mode 100644 index 00000000000..6336dab5753 --- /dev/null +++ b/config/grafana/data_grafana_dashboard.json @@ -0,0 +1,1511 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1667344411089, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Amount spilled by dataset operators. DataContext.enable_get_object_locations_for_metrics must be set to True to report this metric", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_spilled_bytes{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Bytes Spilled: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Spilled", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Amount allocated by dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_allocated_bytes{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Bytes Allocated: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Allocated", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Amount freed by dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_freed_bytes{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Bytes Freed: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Freed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Amount of memory store used by dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_current_bytes{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Current Usage: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Object Store Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Logical CPUs allocated to dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_cpu_usage_cores{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "CPU Usage: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPUs (logical slots)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "cores", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Logical GPUs allocated to dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_gpu_usage_cores{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "GPU Usage: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GPUs (logical slots)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "cores", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Total bytes outputted by dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_output_bytes{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Bytes Outputted: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Outputted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Total rows outputted by dataset operators.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_output_rows{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Rows Outputted: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Rows Outputted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "rows", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Time spent generating blocks.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_block_generation_seconds{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset, operator)", + "interval": "", + "legendFormat": "Block Generation Time: {{dataset}}, {{operator}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Block Generation Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "seconds", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Seconds user thread is blocked by iter_batches()", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_iter_total_blocked_seconds{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset)", + "interval": "", + "legendFormat": "Seconds: {{dataset}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Iteration Blocked Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "seconds", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Seconds spent in user code", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_data_iter_user_seconds{dataset=~\"$DatasetID\",SessionName=~\"$SessionName\",}) by (dataset)", + "interval": "", + "legendFormat": "Seconds: {{dataset}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Iteration User Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "seconds", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "rayVersion:2.9.0" + ], + "templating": { + "list": [ + { + "current": { + "selected": false + }, + "description": "Filter queries of a specific Prometheus type.", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": false + }, + "datasource": "${datasource}", + "definition": "label_values(ray_data_allocated_bytes{}, SessionName)", + "description": "Filter queries to specific ray sessions.", + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "SessionName", + "options": [], + "query": { + "query": "label_values(ray_data_allocated_bytes{}, SessionName)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_data_allocated_bytes{}, dataset)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "DatasetID", + "options": [], + "query": { + "query": "label_values(ray_data_allocated_bytes{}, dataset)", + "refId": "Prometheus-Dataset-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "rayMeta": [ + "excludesSystemRoutes", + "supportsGlobalFilterOverride" + ], + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Data Dashboard", + "uid": "rayDataDashboard", + "version": 1 +} diff --git a/config/grafana/default_grafana_dashboard.json b/config/grafana/default_grafana_dashboard.json index bd8a0330db9..96603322971 100644 --- a/config/grafana/default_grafana_dashboard.json +++ b/config/grafana/default_grafana_dashboard.json @@ -23,7 +23,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Current number of tasks in a particular state.\n\nState: the task state, as described by rpc::TaskState proto in common.proto. Task resubmissions due to failures or object reconstruction are shown with (retry) in the label.", "fieldConfig": { "defaults": {}, @@ -94,7 +94,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(max_over_time(ray_tasks{IsRetry=\"0\",State=~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}[14d])) by (State) or clamp_min(sum(ray_tasks{IsRetry=\"0\",State!~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}) by (State), 0)", + "expr": "sum(max_over_time(ray_tasks{IsRetry=\"0\",State=~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}[14d])) by (State) or clamp_min(sum(ray_tasks{IsRetry=\"0\",State!~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}) by (State), 0)", "interval": "", "legendFormat": "{{State}}", "queryType": "randomWalk", @@ -102,7 +102,7 @@ }, { "exemplar": true, - "expr": "sum(max_over_time(ray_tasks{IsRetry!=\"0\",State=~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}[14d])) by (State) or clamp_min(sum(ray_tasks{IsRetry!=\"0\",State!~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}) by (State), 0)", + "expr": "sum(max_over_time(ray_tasks{IsRetry!=\"0\",State=~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}[14d])) by (State) or clamp_min(sum(ray_tasks{IsRetry!=\"0\",State!~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}) by (State), 0)", "interval": "", "legendFormat": "{{State}} (retry)", "queryType": "randomWalk", @@ -157,7 +157,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Current number of (live) tasks with a particular name. Task resubmissions due to failures or object reconstruction are shown with (retry) in the label.", "fieldConfig": { "defaults": {}, @@ -228,7 +228,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_tasks{IsRetry=\"0\",State!~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}) by (Name)", + "expr": "sum(ray_tasks{IsRetry=\"0\",State!~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}) by (Name)", "interval": "", "legendFormat": "{{Name}}", "queryType": "randomWalk", @@ -236,7 +236,7 @@ }, { "exemplar": true, - "expr": "sum(ray_tasks{IsRetry!=\"0\",State!~\"FINISHED|FAILED\",SessionName=\"$SessionName\",}) by (Name)", + "expr": "sum(ray_tasks{IsRetry!=\"0\",State!~\"FINISHED|FAILED\",SessionName=~\"$SessionName\",}) by (Name)", "interval": "", "legendFormat": "{{Name}} (retry)", "queryType": "randomWalk", @@ -291,7 +291,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Current number of actors in a particular state.\n\nState: the actor state, as described by rpc::ActorTableData proto in gcs.proto.", "fieldConfig": { "defaults": {}, @@ -362,7 +362,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_actors{SessionName=\"$SessionName\",}) by (State)", + "expr": "sum(ray_actors{SessionName=~\"$SessionName\",}) by (State)", "interval": "", "legendFormat": "{{State}}", "queryType": "randomWalk", @@ -417,7 +417,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Current number of (live) actors with a particular name.", "fieldConfig": { "defaults": {}, @@ -488,7 +488,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_actors{State!=\"DEAD\",SessionName=\"$SessionName\",}) by (Name)", + "expr": "sum(ray_actors{State!=\"DEAD\",SessionName=~\"$SessionName\",}) by (Name)", "interval": "", "legendFormat": "{{Name}}", "queryType": "randomWalk", @@ -543,7 +543,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Logical CPU usage of Ray. The dotted line indicates the total number of CPUs. The logical CPU is allocated by `num_cpus` arguments from tasks and actors. PENDING means the number of CPUs that will be available when new nodes are up after the autoscaler scales up.\n\nNOTE: Ray's logical CPU is different from physical CPU usage. Ray's logical CPU is allocated by `num_cpus` arguments.", "fieldConfig": { "defaults": {}, @@ -614,7 +614,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_resources{Name=\"CPU\",State=\"USED\",SessionName=\"$SessionName\",}) by (instance)", + "expr": "sum(ray_resources{Name=\"CPU\",State=\"USED\",SessionName=~\"$SessionName\",}) by (instance)", "interval": "", "legendFormat": "CPU Usage: {{instance}}", "queryType": "randomWalk", @@ -622,7 +622,7 @@ }, { "exemplar": true, - "expr": "sum(ray_resources{Name=\"CPU\",SessionName=\"$SessionName\",})", + "expr": "sum(ray_resources{Name=\"CPU\",SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -630,7 +630,7 @@ }, { "exemplar": true, - "expr": "((sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"CPU\",SessionName=\"$SessionName\",}) or vector(0)) and (sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"CPU\",SessionName=\"$SessionName\",}) or vector(0)) > (sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=\"$SessionName\",}) or vector(0)))", + "expr": "((sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=~\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"CPU\",SessionName=~\"$SessionName\",}) or vector(0)) and (sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=~\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"CPU\",SessionName=~\"$SessionName\",}) or vector(0)) > (sum(autoscaler_cluster_resources{resource=\"CPU\",SessionName=~\"$SessionName\",}) or vector(0)))", "interval": "", "legendFormat": "MAX + PENDING", "queryType": "randomWalk", @@ -685,7 +685,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Object store memory usage by location. The dotted line indicates the object store memory capacity.\n\nLocation: where the memory was allocated, which is MMAP_SHM or MMAP_DISK to indicate memory-mapped page, SPILLED to indicate spillage to disk, and WORKER_HEAP for objects small enough to be inlined in worker memory. Refer to metric_defs.cc for more information.", "fieldConfig": { "defaults": {}, @@ -756,7 +756,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_object_store_memory{SessionName=\"$SessionName\",}) by (Location)", + "expr": "sum(ray_object_store_memory{SessionName=~\"$SessionName\",}) by (Location)", "interval": "", "legendFormat": "{{Location}}", "queryType": "randomWalk", @@ -764,7 +764,7 @@ }, { "exemplar": true, - "expr": "sum(ray_resources{Name=\"object_store_memory\",SessionName=\"$SessionName\",})", + "expr": "sum(ray_resources{Name=\"object_store_memory\",SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -819,7 +819,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Logical GPU usage of Ray. The dotted line indicates the total number of GPUs. The logical GPU is allocated by `num_gpus` arguments from tasks and actors. PENDING means the number of GPUs that will be available when new nodes are up after the autoscaler scales up.", "fieldConfig": { "defaults": {}, @@ -890,7 +890,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_resources{Name=\"GPU\",State=\"USED\",SessionName=\"$SessionName\",}", + "expr": "ray_resources{Name=\"GPU\",State=\"USED\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "GPU Usage: {{instance}}", "queryType": "randomWalk", @@ -898,7 +898,7 @@ }, { "exemplar": true, - "expr": "sum(ray_resources{Name=\"GPU\",SessionName=\"$SessionName\",})", + "expr": "sum(ray_resources{Name=\"GPU\",SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -906,7 +906,7 @@ }, { "exemplar": true, - "expr": "((sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"GPU\",SessionName=\"$SessionName\",}) or vector(0)) and (sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"GPU\",SessionName=\"$SessionName\",}) or vector(0)) > (sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=\"$SessionName\",}) or vector(0)))", + "expr": "((sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=~\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"GPU\",SessionName=~\"$SessionName\",}) or vector(0)) and (sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=~\"$SessionName\",}) or vector(0)) + (sum(autoscaler_pending_resources{resource=\"GPU\",SessionName=~\"$SessionName\",}) or vector(0)) > (sum(autoscaler_cluster_resources{resource=\"GPU\",SessionName=~\"$SessionName\",}) or vector(0)))", "interval": "", "legendFormat": "MAX + PENDING", "queryType": "randomWalk", @@ -961,7 +961,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Current number of placement groups in a particular state.\n\nState: the placement group state, as described by the rpc::PlacementGroupTable proto in gcs.proto.", "fieldConfig": { "defaults": {}, @@ -1032,7 +1032,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_placement_groups{SessionName=\"$SessionName\",}) by (State)", + "expr": "sum(ray_placement_groups{SessionName=~\"$SessionName\",}) by (State)", "interval": "", "legendFormat": "{{State}}", "queryType": "randomWalk", @@ -1087,7 +1087,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "", "fieldConfig": { "defaults": {}, @@ -1158,7 +1158,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_cpu_utilization{instance=~\"$Instance\",SessionName=\"$SessionName\",} * ray_node_cpu_count{instance=~\"$Instance\",SessionName=\"$SessionName\",} / 100", + "expr": "ray_node_cpu_utilization{instance=~\"$Instance\",SessionName=~\"$SessionName\",} * ray_node_cpu_count{instance=~\"$Instance\",SessionName=~\"$SessionName\",} / 100", "interval": "", "legendFormat": "CPU Usage: {{instance}}", "queryType": "randomWalk", @@ -1166,7 +1166,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_cpu_count{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_cpu_count{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -1221,7 +1221,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Node's physical (hardware) GPU usage. The dotted line means the total number of hardware GPUs from the cluster. ", "fieldConfig": { "defaults": {}, @@ -1292,7 +1292,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_gpus_utilization{instance=~\"$Instance\",SessionName=\"$SessionName\",} / 100", + "expr": "ray_node_gpus_utilization{instance=~\"$Instance\",SessionName=~\"$SessionName\",} / 100", "interval": "", "legendFormat": "GPU Usage: {{instance}}, gpu.{{GpuIndex}}, {{GpuDeviceName}}", "queryType": "randomWalk", @@ -1300,7 +1300,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_gpus_available{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_gpus_available{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -1355,7 +1355,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Node's physical (hardware) disk usage. The dotted line means the total amount of disk space from the cluster.\n\nNOTE: When Ray is deployed within a container, this shows the disk usage from the host machine. ", "fieldConfig": { "defaults": {}, @@ -1426,7 +1426,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_disk_usage{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_disk_usage{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Disk Used: {{instance}}", "queryType": "randomWalk", @@ -1434,7 +1434,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_disk_free{SessionName=\"$SessionName\",}) + sum(ray_node_disk_usage{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_disk_free{SessionName=~\"$SessionName\",}) + sum(ray_node_disk_usage{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -1489,7 +1489,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Disk IO per node.", "fieldConfig": { "defaults": {}, @@ -1560,7 +1560,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_disk_io_write_speed{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_disk_io_write_speed{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Write: {{instance}}", "queryType": "randomWalk", @@ -1568,7 +1568,7 @@ }, { "exemplar": true, - "expr": "ray_node_disk_io_read_speed{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_disk_io_read_speed{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Read: {{instance}}", "queryType": "randomWalk", @@ -1623,7 +1623,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "The physical (hardware) memory usage for each node. The dotted line means the total amount of memory from the cluster. Node memory is a sum of object store memory (shared memory) and heap memory.\n\nNote: If Ray is deployed within a container, the total memory could be lower than the host machine because Ray may reserve some additional memory space outside the container.", "fieldConfig": { "defaults": {}, @@ -1694,7 +1694,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_mem_used{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_mem_used{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Memory Used: {{instance}}", "queryType": "randomWalk", @@ -1702,7 +1702,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_mem_total{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_mem_total{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -1757,7 +1757,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "The number of tasks and actors killed by the Ray Out of Memory killer due to high memory pressure. Metrics are broken down by IP and the name. https://docs.ray.io/en/master/ray-core/scheduling/ray-oom-prevention.html.", "fieldConfig": { "defaults": {}, @@ -1828,7 +1828,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_memory_manager_worker_eviction_total{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_memory_manager_worker_eviction_total{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "OOM Killed: {{Name}}, {{instance}}", "queryType": "randomWalk", @@ -1883,7 +1883,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "The physical (hardware) memory usage across the cluster, broken down by component. This reports the summed RSS-SHM per Ray component, which corresponds to an approximate memory usage per proc. Ray components consist of system components (e.g., raylet, gcs, dashboard, or agent) and the process (that contains method names) names of running tasks/actors.", "fieldConfig": { "defaults": {}, @@ -1954,7 +1954,7 @@ "targets": [ { "exemplar": true, - "expr": "(sum(ray_component_rss_mb{SessionName=\"$SessionName\",} * 1e6) by (Component)) - (sum(ray_component_mem_shared_bytes{SessionName=\"$SessionName\",}) by (Component))", + "expr": "(sum(ray_component_rss_mb{SessionName=~\"$SessionName\",} * 1e6) by (Component)) - (sum(ray_component_mem_shared_bytes{SessionName=~\"$SessionName\",}) by (Component))", "interval": "", "legendFormat": "{{Component}}", "queryType": "randomWalk", @@ -1962,7 +1962,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_mem_shared_bytes{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_mem_shared_bytes{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "shared_memory", "queryType": "randomWalk", @@ -1970,7 +1970,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_mem_total{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_mem_total{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -2025,7 +2025,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "The physical (hardware) CPU usage across the cluster, broken down by component. This reports the summed CPU usage per Ray component. Ray components consist of system components (e.g., raylet, gcs, dashboard, or agent) and the process (that contains method names) names of running tasks/actors.", "fieldConfig": { "defaults": {}, @@ -2096,7 +2096,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(ray_component_cpu_percentage{SessionName=\"$SessionName\",}) by (Component) / 100", + "expr": "sum(ray_component_cpu_percentage{SessionName=~\"$SessionName\",}) by (Component) / 100", "interval": "", "legendFormat": "{{Component}}", "queryType": "randomWalk", @@ -2104,7 +2104,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_cpu_count{SessionName=\"$SessionName\",})", + "expr": "sum(ray_node_cpu_count{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -2159,7 +2159,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "The physical (hardware) GPU memory usage for each node. The dotted line means the total amount of GPU memory from the cluster.", "fieldConfig": { "defaults": {}, @@ -2230,7 +2230,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_gram_used{instance=~\"$Instance\",SessionName=\"$SessionName\",} * 1024 * 1024", + "expr": "ray_node_gram_used{instance=~\"$Instance\",SessionName=~\"$SessionName\",} * 1024 * 1024", "interval": "", "legendFormat": "Used GRAM: {{instance}}, gpu.{{GpuIndex}}, {{GpuDeviceName}}", "queryType": "randomWalk", @@ -2238,7 +2238,7 @@ }, { "exemplar": true, - "expr": "(sum(ray_node_gram_available{SessionName=\"$SessionName\",}) + sum(ray_node_gram_used{SessionName=\"$SessionName\",})) * 1024 * 1024", + "expr": "(sum(ray_node_gram_available{SessionName=~\"$SessionName\",}) + sum(ray_node_gram_used{SessionName=~\"$SessionName\",})) * 1024 * 1024", "interval": "", "legendFormat": "MAX", "queryType": "randomWalk", @@ -2293,7 +2293,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Network speed per node", "fieldConfig": { "defaults": {}, @@ -2364,7 +2364,7 @@ "targets": [ { "exemplar": true, - "expr": "ray_node_network_receive_speed{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_network_receive_speed{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Recv: {{instance}}", "queryType": "randomWalk", @@ -2372,7 +2372,7 @@ }, { "exemplar": true, - "expr": "ray_node_network_send_speed{instance=~\"$Instance\",SessionName=\"$SessionName\",}", + "expr": "ray_node_network_send_speed{instance=~\"$Instance\",SessionName=~\"$SessionName\",}", "interval": "", "legendFormat": "Send: {{instance}}", "queryType": "randomWalk", @@ -2427,7 +2427,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "A total number of active failed, and pending nodes from the cluster. \n\nACTIVE: A node is alive and available.\n\nFAILED: A node is dead and not available. The node is considered dead when the raylet process on the node is terminated. The node will get into the failed state if it cannot be provided (e.g., there's no available node from the cloud provider) or failed to setup (e.g., setup_commands have errors). \n\nPending: A node is being started by the Ray cluster launcher. The node is unavailable now because it is being provisioned and initialized.", "fieldConfig": { "defaults": {}, @@ -2498,7 +2498,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(autoscaler_active_nodes{SessionName=\"$SessionName\",}) by (NodeType)", + "expr": "sum(autoscaler_active_nodes{SessionName=~\"$SessionName\",}) by (NodeType)", "interval": "", "legendFormat": "Active Nodes: {{NodeType}}", "queryType": "randomWalk", @@ -2506,7 +2506,7 @@ }, { "exemplar": true, - "expr": "sum(autoscaler_recently_failed_nodes{SessionName=\"$SessionName\",}) by (NodeType)", + "expr": "sum(autoscaler_recently_failed_nodes{SessionName=~\"$SessionName\",}) by (NodeType)", "interval": "", "legendFormat": "Failed Nodes: {{NodeType}}", "queryType": "randomWalk", @@ -2514,7 +2514,7 @@ }, { "exemplar": true, - "expr": "sum(autoscaler_pending_nodes{SessionName=\"$SessionName\",}) by (NodeType)", + "expr": "sum(autoscaler_pending_nodes{SessionName=~\"$SessionName\",}) by (NodeType)", "interval": "", "legendFormat": "Pending Nodes: {{NodeType}}", "queryType": "randomWalk", @@ -2569,7 +2569,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "Prometheus", + "datasource": "${datasource}", "description": "Aggregated utilization of all physical resources (CPU, GPU, memory, disk, or etc.) across the cluster.", "fieldConfig": { "defaults": {}, @@ -2640,7 +2640,7 @@ "targets": [ { "exemplar": true, - "expr": "avg(ray_node_cpu_utilization{SessionName=\"$SessionName\",})", + "expr": "avg(ray_node_cpu_utilization{SessionName=~\"$SessionName\",})", "interval": "", "legendFormat": "CPU (physical)", "queryType": "randomWalk", @@ -2648,7 +2648,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_gpus_utilization{SessionName=\"$SessionName\",}) / on() (sum(autoscaler_cluster_resources{resource='GPU',SessionName=\"$SessionName\",}) or vector(0))", + "expr": "sum(ray_node_gpus_utilization{SessionName=~\"$SessionName\",}) / on() (sum(autoscaler_cluster_resources{resource='GPU',SessionName=~\"$SessionName\",}) or vector(0))", "interval": "", "legendFormat": "GPU (physical)", "queryType": "randomWalk", @@ -2656,7 +2656,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_mem_used{SessionName=\"$SessionName\",}) / on() (sum(ray_node_mem_total{SessionName=\"$SessionName\",})) * 100", + "expr": "sum(ray_node_mem_used{SessionName=~\"$SessionName\",}) / on() (sum(ray_node_mem_total{SessionName=~\"$SessionName\",})) * 100", "interval": "", "legendFormat": "Memory (RAM)", "queryType": "randomWalk", @@ -2664,7 +2664,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_gram_used{SessionName=\"$SessionName\",}) / on() (sum(ray_node_gram_available{SessionName=\"$SessionName\",}) + sum(ray_node_gram_used{SessionName=\"$SessionName\",})) * 100", + "expr": "sum(ray_node_gram_used{SessionName=~\"$SessionName\",}) / on() (sum(ray_node_gram_available{SessionName=~\"$SessionName\",}) + sum(ray_node_gram_used{SessionName=~\"$SessionName\",})) * 100", "interval": "", "legendFormat": "GRAM", "queryType": "randomWalk", @@ -2672,7 +2672,7 @@ }, { "exemplar": true, - "expr": "sum(ray_object_store_memory{SessionName=\"$SessionName\",}) / on() sum(ray_resources{Name=\"object_store_memory\",SessionName=\"$SessionName\",}) * 100", + "expr": "sum(ray_object_store_memory{SessionName=~\"$SessionName\",}) / on() sum(ray_resources{Name=\"object_store_memory\",SessionName=~\"$SessionName\",}) * 100", "interval": "", "legendFormat": "Object Store Memory", "queryType": "randomWalk", @@ -2680,7 +2680,7 @@ }, { "exemplar": true, - "expr": "sum(ray_node_disk_usage{SessionName=\"$SessionName\",}) / on() (sum(ray_node_disk_free{SessionName=\"$SessionName\",}) + sum(ray_node_disk_usage{SessionName=\"$SessionName\",})) * 100", + "expr": "sum(ray_node_disk_usage{SessionName=~\"$SessionName\",}) / on() (sum(ray_node_disk_free{SessionName=~\"$SessionName\",}) + sum(ray_node_disk_usage{SessionName=~\"$SessionName\",})) * 100", "interval": "", "legendFormat": "Disk", "queryType": "randomWalk", @@ -2735,21 +2735,37 @@ "schemaVersion": 27, "style": "dark", "tags": [ - "rayVersion:2.5.0" + "rayVersion:2.9.0" ], "templating": { "list": [ { - "allValue": null, "current": { "selected": false }, - "datasource": "Prometheus", + "description": "Filter queries of a specific Prometheus type.", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": false + }, + "datasource": "${datasource}", "definition": "label_values(ray_node_network_receive_speed{}, SessionName)", "description": "Filter queries to specific ray sessions.", "error": null, "hide": 0, - "includeAll": false, + "includeAll": true, "label": null, "multi": false, "name": "SessionName", @@ -2779,7 +2795,7 @@ "$__all" ] }, - "datasource": "Prometheus", + "datasource": "${datasource}", "definition": "label_values(ray_node_network_receive_speed{SessionName=\"$SessionName\",}, instance)", "description": null, "error": null, @@ -2817,4 +2833,4 @@ "rayMeta": [ "supportsGlobalFilterOverride" ] -} \ No newline at end of file +} diff --git a/config/grafana/serve_deployment_grafana_dashboard.json b/config/grafana/serve_deployment_grafana_dashboard.json new file mode 100644 index 00000000000..ea80d2a6bd6 --- /dev/null +++ b/config/grafana/serve_deployment_grafana_dashboard.json @@ -0,0 +1,2241 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1667344411089, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Number of replicas per deployment. Ignores \"Route\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_deployment_replica_healthy{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Replicas per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "replicas", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "QPS for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_deployment_request_counter{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "QPS per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Error QPS for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_deployment_error_counter{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error QPS per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P50 latency per replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica, le))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P50 latency per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P90 latency per replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica, le))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P90 latency per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P99 latency per replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",route!~\"/-/.*\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica, le))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{route=~\"$Route\",application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 latency per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Number of requests queued per deployment. Ignores \"Replica\" and \"Route\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_deployment_queued_queries{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue size per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "requests", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Pending requests for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_replica_pending_queries{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pending requests per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "requests", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Current running requests for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_replica_processing_queries{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Running requests per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "requests", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "The number of multiplexed models for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_num_multiplexed_models{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Multiplexed models per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "models", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "The number of times of multiplexed models loaded for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_multiplexed_models_load_counter{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Multiplexed model loads per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "times", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "The number of times of multiplexed models unloaded for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_multiplexed_models_unload_counter{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}) by (application, deployment, replica)", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Multiplexed model unloads per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "times", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P99 latency of mutliplexed model load per replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_multiplexed_model_load_latency_ms_bucket{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica, le))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 latency of multiplexed model loads per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P99 latency of mutliplexed model unload per replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_multiplexed_model_unload_latency_ms_bucket{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])) by (application, deployment, replica, le))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 latency of multiplexed model unloads per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "The ids of multiplexed models for each replica.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "ray_serve_registered_multiplexed_model_id{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}", + "interval": "", + "legendFormat": "{{replica}}:{{model_id}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Multiplexed model ids per replica", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "model", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "The cache hit rate of multiplexed models for the deployment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 5, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(1 - sum(rate(ray_serve_multiplexed_models_load_counter{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m]))/sum(rate(ray_serve_multiplexed_get_model_requests_counter{application=~\"$Application\",deployment=~\"$Deployment\",replica=~\"$Replica\",}[5m])))", + "interval": "", + "legendFormat": "{{replica}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Multiplexed model cache hit rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "%", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "rayVersion:2.9.0" + ], + "templating": { + "list": [ + { + "current": { + "selected": false + }, + "description": "Filter queries to specific prometheus type.", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_deployment_replica_healthy{}, application)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Application", + "options": [], + "query": { + "query": "label_values(ray_serve_deployment_replica_healthy{}, application)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_deployment_replica_healthy{application=~\"$Application\",}, deployment)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Deployment", + "options": [], + "query": { + "query": "label_values(ray_serve_deployment_replica_healthy{application=~\"$Application\",}, deployment)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_deployment_replica_healthy{application=~\"$Application\",deployment=~\"$Deployment\",}, replica)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Replica", + "options": [], + "query": { + "query": "label_values(ray_serve_deployment_replica_healthy{application=~\"$Application\",deployment=~\"$Deployment\",}, replica)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_deployment_request_counter{deployment=~\"$Deployment\",}, route)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Route", + "options": [], + "query": { + "query": "label_values(ray_serve_deployment_request_counter{deployment=~\"$Deployment\",}, route)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "rayMeta": [ + "excludesSystemRoutes", + "supportsGlobalFilterOverride" + ], + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Serve Deployment Dashboard", + "uid": "rayServeDeploymentDashboard", + "version": 1 +} diff --git a/config/grafana/serve_grafana_dashboard.json b/config/grafana/serve_grafana_dashboard.json new file mode 100644 index 00000000000..dec53437cf9 --- /dev/null +++ b/config/grafana/serve_grafana_dashboard.json @@ -0,0 +1,2208 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1667344411089, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Aggregated utilization of all physical resources (CPU, GPU, memory, disk, or etc.) across the cluster. Ignores application variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(ray_node_cpu_utilization{})", + "interval": "", + "legendFormat": "CPU (physical)", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(ray_node_gpus_utilization{}) / on() (sum(autoscaler_cluster_resources{resource='GPU',}) or vector(0))", + "interval": "", + "legendFormat": "GPU (physical)", + "queryType": "randomWalk", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(ray_node_mem_used{}) / on() (sum(ray_node_mem_total{})) * 100", + "interval": "", + "legendFormat": "Memory (RAM)", + "queryType": "randomWalk", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(ray_node_gram_used{}) / on() (sum(ray_node_gram_available{}) + sum(ray_node_gram_used{})) * 100", + "interval": "", + "legendFormat": "GRAM", + "queryType": "randomWalk", + "refId": "D" + }, + { + "exemplar": true, + "expr": "sum(ray_object_store_memory{}) / on() sum(ray_resources{Name=\"object_store_memory\",}) * 100", + "interval": "", + "legendFormat": "Object Store Memory", + "queryType": "randomWalk", + "refId": "E" + }, + { + "exemplar": true, + "expr": "sum(ray_node_disk_usage{}) / on() (sum(ray_node_disk_free{}) + sum(ray_node_disk_usage{})) * 100", + "interval": "", + "legendFormat": "Disk", + "queryType": "randomWalk", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "%", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "QPS for each selected application.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_num_http_requests{application=~\"$Application\",application!~\"\",route=~\"$HTTP_Route\",route!~\"/-/.*\",}[5m])) by (application, route)", + "interval": "", + "legendFormat": "{{application, route}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(ray_serve_num_grpc_requests{application=~\"$Application\",application!~\"\",method=~\"$gRPC_Method\",}[5m])) by (application, method)", + "interval": "", + "legendFormat": "{{application, method}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "QPS per application", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Error QPS for each selected application.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 0, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_num_http_error_requests{application=~\"$Application\",application!~\"\",route=~\"$HTTP_Route\",route!~\"/-/.*\",}[5m])) by (application, route)", + "interval": "", + "legendFormat": "{{application, route}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(ray_serve_num_grpc_error_requests{application=~\"$Application\",application!~\"\",method=~\"$gRPC_Method\",}[5m])) by (application, method)", + "interval": "", + "legendFormat": "{{application, method}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error QPS per application", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P50 latency for selected applications.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_http_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",route=~\"$HTTP_Route\",route!~\"/-/.*\",}[5m])) by (application, route, le))", + "interval": "", + "legendFormat": "{{application, route}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_grpc_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",method=~\"$gRPC_Method\",}[5m])) by (application, method, le))", + "interval": "", + "legendFormat": "{{application, method}}", + "queryType": "randomWalk", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate({__name__=~ \"ray_serve_(http|grpc)_request_latency_ms_bucket\",application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P50 latency per application", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P90 latency for selected applications.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_http_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",route=~\"$HTTP_Route\",route!~\"/-/.*\",}[5m])) by (application, route, le))", + "interval": "", + "legendFormat": "{{application, route}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_grpc_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",method=~\"$gRPC_Method\",}[5m])) by (application, method, le))", + "interval": "", + "legendFormat": "{{application, method}}", + "queryType": "randomWalk", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate({__name__=~ \"ray_serve_(http|grpc)_request_latency_ms_bucket|ray_serve_grpc_request_latency_ms_bucket\",application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P90 latency per application", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P99 latency for selected applications.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 1, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_http_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",route=~\"$HTTP_Route\",route!~\"/-/.*\",}[5m])) by (application, route, le))", + "interval": "", + "legendFormat": "{{application, route}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_grpc_request_latency_ms_bucket{application=~\"$Application\",application!~\"\",method=~\"$gRPC_Method\",}[5m])) by (application, method, le))", + "interval": "", + "legendFormat": "{{application, method}}", + "queryType": "randomWalk", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate({__name__=~ \"ray_serve_(http|grpc)_request_latency_ms_bucket|ray_serve_grpc_request_latency_ms_bucket\",application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 latency per application", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Number of replicas per deployment. Ignores \"Application\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_deployment_replica_healthy{}) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Replicas per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "replicas", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "QPS for each deployment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_deployment_request_counter{application=~\"$Application\",application!~\"\",}[5m])) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "QPS per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Error QPS for each deplyoment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 2, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(ray_serve_deployment_error_counter{application=~\"$Application\",application!~\"\",}[5m])) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error QPS per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "qps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P50 latency per deployment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (application, deployment, le))", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P50 latency per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P90 latency per deployment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (application, deployment, le))", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P90 latency per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "P99 latency per deployment.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 3, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (application, deployment, le))", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(ray_serve_deployment_processing_latency_ms_bucket{application=~\"$Application\",application!~\"\",}[5m])) by (le))", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 latency per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Number of requests queued per deployment. Ignores \"Application\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "x": 0, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_serve_deployment_queued_queries{}) by (application, deployment)", + "interval": "", + "legendFormat": "{{application, deployment}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue size per deployment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "requests", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Number of nodes in this cluster. Ignores \"Application\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "x": 8, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(autoscaler_active_nodes{}) by (NodeType)", + "interval": "", + "legendFormat": "Active Nodes: {{NodeType}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(autoscaler_recently_failed_nodes{}) by (NodeType)", + "interval": "", + "legendFormat": "Failed Nodes: {{NodeType}}", + "queryType": "randomWalk", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(autoscaler_pending_nodes{}) by (NodeType)", + "interval": "", + "legendFormat": "Pending Nodes: {{NodeType}}", + "queryType": "randomWalk", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Node count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "nodes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "description": "Network speed per node. Ignores \"Application\" variable.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "x": 16, + "y": 4, + "w": 8, + "h": 8 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2987", + "alias": "MAX", + "dashes": true, + "color": "#1F60C4", + "fill": 0, + "stack": false + }, + { + "$$hashKey": "object:78", + "alias": "/FINISHED|FAILED|DEAD|REMOVED|Failed Nodes:/", + "hiddenSeries": true + }, + { + "$$hashKey": "object:2987", + "alias": "MAX + PENDING", + "dashes": true, + "color": "#777777", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(ray_node_network_receive_speed{}) by (instance)", + "interval": "", + "legendFormat": "Recv: {{instance}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(ray_node_network_send_speed{}) by (instance)", + "interval": "", + "legendFormat": "Send: {{instance}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Node network", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:628", + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:629", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "rayVersion:2.9.0" + ], + "templating": { + "list": [ + { + "current": { + "selected": false + }, + "description": "Filter queries of a specific Prometheus type.", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_deployment_replica_healthy{}, application)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Application", + "options": [], + "query": { + "query": "label_values(ray_serve_deployment_replica_healthy{}, application)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_num_http_requests{}, route)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "HTTP Route", + "multi": true, + "name": "HTTP_Route", + "options": [], + "query": { + "query": "label_values(ray_serve_num_http_requests{}, route)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${datasource}", + "definition": "label_values(ray_serve_num_grpc_requests{}, method)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "gRPC Service Method", + "multi": true, + "name": "gRPC_Method", + "options": [], + "query": { + "query": "label_values(ray_serve_num_grpc_requests{}, method)", + "refId": "Prometheus-Instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "rayMeta": [ + "excludesSystemRoutes", + "supportsGlobalFilterOverride" + ], + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Serve Dashboard", + "uid": "rayServeDashboard", + "version": 1 +} diff --git a/config/prometheus/podMonitor.yaml b/config/prometheus/podMonitor.yaml index e20f1367d2e..5af17a3fe12 100644 --- a/config/prometheus/podMonitor.yaml +++ b/config/prometheus/podMonitor.yaml @@ -19,3 +19,45 @@ spec: # A list of endpoints allowed as part of this PodMonitor. podMetricsEndpoints: - port: metrics + relabelings: + - sourceLabels: [__meta_kubernetes_pod_label_ray_io_cluster] + targetLabel: ray_io_cluster +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + labels: + # `release: $HELM_RELEASE`: Prometheus can only detect PodMonitor with this label. + release: prometheus + name: ray-head-monitor + namespace: prometheus-system +spec: + jobLabel: ray-head + # Only select Kubernetes Pods in the "default" namespace. + namespaceSelector: + matchNames: + - default + # Only select Kubernetes Pods with "matchLabels". + selector: + matchLabels: + ray.io/node-type: head + # A list of endpoints allowed as part of this PodMonitor. + podMetricsEndpoints: + - port: metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster + - port: as-metrics # autoscaler metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster + - port: dash-metrics # dashboard metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster diff --git a/config/prometheus/rules/prometheusRules.yaml b/config/prometheus/rules/prometheusRules.yaml index 2e26d4448fe..81961554292 100644 --- a/config/prometheus/rules/prometheusRules.yaml +++ b/config/prometheus/rules/prometheusRules.yaml @@ -39,4 +39,3 @@ spec: for: 5m labels: severity: critical - diff --git a/config/prometheus/serviceMonitor.yaml b/config/prometheus/serviceMonitor.yaml deleted file mode 100644 index 71cd2666f95..00000000000 --- a/config/prometheus/serviceMonitor.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: ray-head-monitor - namespace: prometheus-system - labels: - # `release: $HELM_RELEASE`: Prometheus can only detect ServiceMonitor with this label. - release: prometheus -spec: - jobLabel: ray-head - # Only select Kubernetes Services in the "default" namespace. - namespaceSelector: - matchNames: - - default - # Only select Kubernetes Services with "matchLabels". - selector: - matchLabels: - ray.io/node-type: head - # A list of endpoints allowed as part of this ServiceMonitor. - endpoints: - - port: metrics - - port: as-metrics # autoscaler metrics - - port: dash-metrics # dashboard metrics - targetLabels: - - ray.io/cluster - diff --git a/docs/best-practice/worker-head-reconnection.md b/docs/best-practice/worker-head-reconnection.md index 9a67c24b6e3..a6ee46d4551 100644 --- a/docs/best-practice/worker-head-reconnection.md +++ b/docs/best-practice/worker-head-reconnection.md @@ -2,12 +2,12 @@ ## Problem -For a `RayCluster` with a head and several workers, if a worker is crashed, it will be relaunched immediately and re-join the same cluster quickly; however, when the head is crashed, it will run into the issue [#104](https://github.com/ray-project/kuberay/issues/104) that all worker nodes are lost from the head for a long period of time. +For a `RayCluster` with a head and several workers, if a worker is crashed, it will be relaunched immediately and re-join the same cluster quickly; however, when the head is crashed, it will run into the issue [#104](https://github.com/ray-project/kuberay/issues/104) that all worker nodes are lost from the head for a long period of time. ## Explanation > **note** -It was an issue that only happened with old version In the Kuberay version under 0.3.0, we recommand you try the latest version +It was an issue that only happened with old version In the Kuberay version under 0.3.0, we recommand you try the latest version When the head pod was deleted, it will be recreated with a new IP by KubeRay controller,and the GCS server address is changed accordingly. The Raylets of all workers will try to get GCS address from Redis in `ReconnectGcsServer`, but the redis_clients always use the previous head IP, so they will always fail to get new GCS address. The Raylets will not exit until max retries are reached. There are two configurations determining this long delay: @@ -30,6 +30,6 @@ We recommend using the latest version of KubeRay. After version 0.5.0, the GCS F For older version (Kuberay <=0.4.0, ray <=2.1.0). To reduce the chances of a lost worker-head connection, there are two other options: -- Make head more stable: when creating the cluster, allocate sufficient amount of resources on head pod such that it tends to be stable and not easy to crash. You can also set {"num-cpus": "0"} in "rayStartParams" of "headGroupSpec" such that Ray scheduler will skip the head node when scheduling workloads. This also helps to maintain the stability of the head. +- Make head more stable: when creating the cluster, allocate sufficient amount of resources on head pod such that it tends to be stable and not easy to crash. You can also set {"num-cpus": "0"} in "rayStartParams" of "headGroupSpec" such that Ray scheduler will skip the head node when scheduling workloads. This also helps to maintain the stability of the head. -- Make reconnection shorter: for version <= 1.9.1, you can set this head param --system-config='{"ping_gcs_rpc_server_max_retries": 20}' to reduce the delay from 600s down to 20s before workers reconnect to the new head. +- Make reconnection shorter: for version <= 1.9.1, you can set this head param --system-config='{"ping_gcs_rpc_server_max_retries": 20}' to reduce the delay from 600s down to 20s before workers reconnect to the new head. diff --git a/docs/components/cli.md b/docs/components/cli.md deleted file mode 120000 index 63612d092b4..00000000000 --- a/docs/components/cli.md +++ /dev/null @@ -1 +0,0 @@ -../../cli/README.md \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.autoscaler.large.yaml b/docs/components/config/samples/ray-cluster.autoscaler.large.yaml deleted file mode 120000 index ccb43b0932b..00000000000 --- a/docs/components/config/samples/ray-cluster.autoscaler.large.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.autoscaler.large.yaml \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.autoscaler.yaml b/docs/components/config/samples/ray-cluster.autoscaler.yaml deleted file mode 120000 index 47bcf4cb0a9..00000000000 --- a/docs/components/config/samples/ray-cluster.autoscaler.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.autoscaler.yaml \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.complete.large.yaml b/docs/components/config/samples/ray-cluster.complete.large.yaml deleted file mode 120000 index d6e2b491222..00000000000 --- a/docs/components/config/samples/ray-cluster.complete.large.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.complete.large.yaml \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.complete.yaml b/docs/components/config/samples/ray-cluster.complete.yaml deleted file mode 120000 index d39a98d4d83..00000000000 --- a/docs/components/config/samples/ray-cluster.complete.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.complete.yaml \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.heterogeneous.yaml b/docs/components/config/samples/ray-cluster.heterogeneous.yaml deleted file mode 120000 index a6d7d2c2463..00000000000 --- a/docs/components/config/samples/ray-cluster.heterogeneous.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.heterogeneous.yaml \ No newline at end of file diff --git a/docs/components/config/samples/ray-cluster.mini.yaml b/docs/components/config/samples/ray-cluster.mini.yaml deleted file mode 120000 index 664c2862e3d..00000000000 --- a/docs/components/config/samples/ray-cluster.mini.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../ray-operator/config/samples/ray-cluster.mini.yaml \ No newline at end of file diff --git a/docs/components/pythonapiclient.md b/docs/components/pythonapiclient.md new file mode 120000 index 00000000000..8851f196625 --- /dev/null +++ b/docs/components/pythonapiclient.md @@ -0,0 +1 @@ +../../clients/python-apiserver-client/README.md \ No newline at end of file diff --git a/docs/components/pythonclient.md b/docs/components/pythonclient.md new file mode 120000 index 00000000000..75c3f6bf274 --- /dev/null +++ b/docs/components/pythonclient.md @@ -0,0 +1 @@ +../../clients/python-client/README.md \ No newline at end of file diff --git a/docs/design/protobuf-grpc-service.md b/docs/design/protobuf-grpc-service.md index a8ddaf6b20f..411314a8c7a 100644 --- a/docs/design/protobuf-grpc-service.md +++ b/docs/design/protobuf-grpc-service.md @@ -44,7 +44,7 @@ In order to better define resources at the API level, a few proto files will be - Some of the Kubernetes API like `tolerance` and `node affinity` are too complicated to be converted to an API. - We want to leave some flexibility to use database to store history data in the near future (for example, pagination, list options etc). -To resolve these issues, we provide a simple API which can cover most common use-cases. +To resolve these issues, we provide a simple API which can cover most common use-cases. For example, the protobuf definition of the `RayCluster`: @@ -92,7 +92,7 @@ service ClusterService { message CreateClusterRequest { // The cluster to be created. Cluster cluster = 1; - // The namespace of the cluster to be created. + // The namespace of the cluster to be created. string namespace = 2; } @@ -149,7 +149,7 @@ message Cluster { PRODUCTION = 3; } Environment environment = 5; - + // Required field. This field indicates ray cluster configuration ClusterSpec cluster_spec = 6; @@ -164,7 +164,7 @@ message Cluster { // Output. The list related to the cluster. repeated ClusterEvent events = 10; - + // Output. The service endpoint of the cluster map service_endpoint = 11; @@ -189,8 +189,8 @@ message Volume { string name = 3; string source = 4; bool read_only = 5; - - // If indicate hostpath, we need to let user indicate which type + + // If indicate hostpath, we need to let user indicate which type // they would like to use. enum HostPathType { DIRECTORY = 0; @@ -226,11 +226,11 @@ message WorkerGroupSpec { string compute_template = 2; // Optional field. This field will be used to retrieve right ray container string image = 3; - // Required. Desired replicas of the worker group + // Required. Desired replicas of the worker group int32 replicas = 4; - // Optional. Min replicas of the worker group + // Optional. Min replicas of the worker group int32 min_replicas = 5; - // Optional. Max replicas of the worker group + // Optional. Max replicas of the worker group int32 max_replicas = 6; // Optional. The ray start parames of worker node group map ray_start_params = 7; @@ -245,7 +245,7 @@ message ClusterEvent { // Output. Human readable name for event. string name = 2; - // Output. The creation time of the event. + // Output. The creation time of the event. google.protobuf.Timestamp created_at = 3; // Output. The last time the event occur. @@ -262,7 +262,7 @@ message ClusterEvent { // Output. Type of this event (Normal, Warning), new types could be added in the future string type = 8; - + // Output. The number of times this event has occurred. int32 count = 9; } diff --git a/docs/development/development.md b/docs/development/development.md index d29966d214b..9c1a81e81c7 100644 --- a/docs/development/development.md +++ b/docs/development/development.md @@ -13,10 +13,8 @@ To learn more about developing and testing the KubeRay Operator, please refer to The KubeRay APIServer is a central component that exposes the KubeRay API for managing Ray clusters. For more information about developing and testing the KubeRay APIServer, please refer to the [APIServer Development Guide](https://github.com/ray-project/kuberay/blob/master/apiserver/DEVELOPMENT.md). -## KubeRay CLI - -The KubeRay CLI is a command-line interface for interacting with Ray clusters managed by KubeRay. -For more information about developing and testing the KubeRay CLI, please refer to the [CLI Development Guide](https://github.com/ray-project/kuberay/blob/master/cli/README.md). +## KubeRay Python client +The KubeRay Python client library provides APIs to handle RayCluster from your Python application. For more information about developing and testing the KubeRay Python client, please refer to the [Python Client](https://github.com/ray-project/kuberay/blob/master/components/pythonclient.md), [Python API Client](https://github.com/ray-project/kuberay/blob/master/components/pythonapiclient.md). ## Proto and OpenAPI diff --git a/docs/development/release.md b/docs/development/release.md index f44aec43591..22aa20f5026 100644 --- a/docs/development/release.md +++ b/docs/development/release.md @@ -25,7 +25,7 @@ KubeRay release plans to synchronize with every two Ray releases. KubeRay v0.5.0 ### Steps -#### Step 0. KubeRay feature freeze +#### Step 0. KubeRay feature freeze Ensure the last master commit you want to release passes the [Go-build-and-test](https://github.com/ray-project/kuberay/actions/workflows/test-job.yaml) workflow before feature freeze. @@ -38,16 +38,16 @@ Ensure that the desired master commit is stable by verifying the following: * The example configurations work. During the KubeRay `0.5.0` release, we used spreadsheets to track [manual testing](https://docs.google.com/spreadsheets/d/13q059_lcaKb3BFmOlmZTtOqZPuGPYjRuKPqI1FSpCO8/edit?usp=sharing) and [documentation updates](https://docs.google.com/spreadsheets/d/13q059_lcaKb3BFmOlmZTtOqZPuGPYjRuKPqI1FSpCO8/edit?usp=sharing). Instead of using the latest stable release of KubeRay (i.e., v0.4.0 for the v0.5.0 release process), we should verify the master branch using the following: - + * The nightly KubeRay operator Docker image: `kuberay/operator:nightly`. * The local CRD / YAML / Helm charts. -Open PRs to track the progress of manual testing for documentation, but **avoid merging these PRs** until the Docker images and Helm charts for v0.5.0 are available +Open PRs to track the progress of manual testing for documentation, but **avoid merging these PRs** until the Docker images and Helm charts for v0.5.0 are available (example PRs: [#997](https://github.com/ray-project/kuberay/pull/997), [#999](https://github.com/ray-project/kuberay/pull/999), [#1004](https://github.com/ray-project/kuberay/pull/1004), [#1012](https://github.com/ray-project/kuberay/pull/1012)). Bug fix pull requests to fix bugs which found in the documentation testing process **can be merged** (example PR: [#1000](https://github.com/ray-project/kuberay/pull/1000)). Manual testing can be time-consuming, and to relieve the workload, we plan to add more CI tests. The minimum requirements to move forward are: - + * All example configurations can work with `kuberay/operator:nightly` and the latest release of Ray (i.e. 2.3.0 for KubeRay v0.5.0). * Update all version strings in the documents. @@ -112,7 +112,7 @@ You will be prompted for a commit reference and an image tag. The commit referen #### Step 6. Merge open PRs in step 1 and post-release PRs -Now, we have the Docker images and Helm charts for v0.5.0. +Now, we have the Docker images and Helm charts for v0.5.0. * Merge the pull requests in Step 1 (i.e. [#997](https://github.com/ray-project/kuberay/pull/997), [#999](https://github.com/ray-project/kuberay/pull/999), [#1004](https://github.com/ray-project/kuberay/pull/1004), [#1012](https://github.com/ray-project/kuberay/pull/1012)) @@ -128,10 +128,8 @@ Now, we have the Docker images and Helm charts for v0.5.0. #### Step 8. Generate release -* Click "Create release" to create release for the tag v0.5.0 ([link](https://github.com/ray-project/kuberay/tags)). - * Update `VERSION` in the [Makefile](https://github.com/ray-project/kuberay/blob/master/cli/Makefile) to `vX.Y.Z`. - * Run `make release` in cli folder and generate `kuberay-$VERSION-darwin-amd64.zip` and `kuberay-$VERSION-linux-amd64.zip` files. Upload them to the GitHub release. - * Follow the [instructions](../release/changelog.md) to generate release notes and add notes in the GitHub release. +* Currently, a GitHub release will be generated by CI automatically if a semver tag is pushed to the repository. +* Follow the [instructions](../release/changelog.md) to generate release notes and add notes in the GitHub release. #### Step 9. Announce the release on the KubeRay slack! @@ -144,4 +142,3 @@ Now, we have the Docker images and Helm charts for v0.5.0. #### Step 11. Update and improve this release document! * Update this document and optimize the release process! - diff --git a/docs/guidance/rayStartParams.md b/docs/guidance/rayStartParams.md index 15bc76217ae..28d2a63012c 100644 --- a/docs/guidance/rayStartParams.md +++ b/docs/guidance/rayStartParams.md @@ -16,8 +16,8 @@ The default value for both Ray and KubeRay 0.5.0 is `localhost`. Please note tha - `--port`: Port for the GCS server. The port is set to `6379` by default. Please ensure that this value matches the `gcs-server` container port in Ray head container. -- `--redis-password`: Redis password for an external Redis, necessary when [fault tolerance](https://github.com/ray-project/kuberay/blob/master/docs/guidance/gcs-ft.md) is enabled. -The default value is `""` after Ray 2.3.0. See [#929](https://github.com/ray-project/kuberay/pull/929) for more details. +- `--redis-password`: Redis password for an external Redis, necessary when [fault tolerance](https://github.com/ray-project/kuberay/blob/master/docs/guidance/gcs-ft.md) is enabled. +The default value is `""` after Ray 2.3.0. See [#929](https://github.com/ray-project/kuberay/pull/929) for more details. ### Options Exclusive to the worker Pods @@ -34,5 +34,3 @@ The default value is `""` after Ray 2.3.0. See [#929](https://github.com/ray-pro - `--num-cpus`: Number of logical CPUs on this Ray node. Default is determined by Ray container resource limits. Modify Ray container resource limits instead of this option. See [PR #170](https://github.com/ray-project/kuberay/pull/170). However, it is sometimes useful to override this autodetected value. For example, setting `num-cpus:"0"` for the Ray head pod will prevent Ray workloads with non-zero CPU requirements from being scheduled on the head. - `--num-gpus`: Number of GPUs on this Ray node. Default is determined by Ray container resource limits. Modify Ray container resource limits instead of this option. See [PR #170](https://github.com/ray-project/kuberay/pull/170). - - diff --git a/docs/guidance/rayservice-high-availability.md b/docs/guidance/rayservice-high-availability.md index a00d48ac2b5..aa916615aec 100644 --- a/docs/guidance/rayservice-high-availability.md +++ b/docs/guidance/rayservice-high-availability.md @@ -136,4 +136,4 @@ In your locust terminal, You will see the failed rate is 0.00%. ```sh kubectl delete -f ./ray-operator/config/samples/ray-service.high-availability-locust.yaml kind delete cluster -``` \ No newline at end of file +``` diff --git a/docs/index.md b/docs/index.md index a5cb78144b4..3c68e732da6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -27,7 +27,7 @@ KubeRay is a powerful, open-source Kubernetes operator that simplifies the deployment and management of [Ray](https://github.com/ray-project/ray) applications on Kubernetes. It offers several key components: **KubeRay core**: This is the official, fully-maintained component of KubeRay that provides three custom resource definitions, RayCluster, RayJob, and RayService. These resources are designed to help you run a wide range of workloads with ease. - + * **RayCluster**: KubeRay fully manages the lifecycle of RayCluster, including cluster creation/deletion, autoscaling, and ensuring fault tolerance. * **RayJob**: With RayJob, KubeRay automatically creates a RayCluster and submits a job when the cluster is ready. You can also configure RayJob to automatically delete the RayCluster once the job finishes. @@ -41,16 +41,14 @@ by some organizations to back user interfaces for KubeRay resource management. * **KubeRay Python client**: This Python client library provides APIs to handle RayCluster from your Python application. -* **KubeRay CLI**: KubeRay CLI provides the ability to manage KubeRay resources through command-line interface. - ## KubeRay ecosystem -* [AWS Application Load Balancer](guidance/ingress/#example-aws-application-load-balancer-alb-ingress-support-on-aws-eks) -* [Nginx](guidance/ingress/#example-manually-setting-up-nginx-ingress-on-kind) -* [Prometheus and Grafana](guidance/prometheus-grafana/) -* [Volcano](guidance/volcano-integration/) -* [MCAD](guidance/kuberay-with-MCAD/) -* [Kubeflow](guidance/kubeflow-integration/) +* [AWS Application Load Balancer](https://docs.ray.io/en/master/cluster/kubernetes/k8s-ecosystem/ingress.html#aws-application-load-balancer-alb-ingress-support-on-aws-eks) +* [Nginx](https://docs.ray.io/en/master/cluster/kubernetes/k8s-ecosystem/ingress.html#manually-setting-up-nginx-ingress-on-kind) +* [Prometheus and Grafana](https://docs.ray.io/en/master/cluster/kubernetes/k8s-ecosystem/prometheus-grafana.html#kuberay-prometheus-grafana) +* [Volcano](https://docs.ray.io/en/master/cluster/kubernetes/k8s-ecosystem/volcano.html) +* [MCAD](guidance/kuberay-with-MCAD.md) +* [Kubeflow](https://docs.ray.io/en/master/cluster/kubernetes/k8s-ecosystem/kubeflow.html) ## Security diff --git a/docs/reference/api.md b/docs/reference/api.md index a985e7c8a5c..f2732b22a22 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -22,50 +22,77 @@ Package v1 contains API Schema definitions for the ray v1 API group AutoscalerOptions specifies optional configuration for the Ray autoscaler. + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core)_ | Resources specifies optional resource request and limit overrides for the autoscaler container. Default values: 500m CPU request and limit. 512Mi memory request and limit. | -| `image` _string_ | Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. | -| `imagePullPolicy` _[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#pullpolicy-v1-core)_ | ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. | -| `env` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Optional list of environment variables to set in the autoscaler container. | -| `envFrom` _[EnvFromSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envfromsource-v1-core) array_ | Optional list of sources to populate environment variables in the autoscaler container. | -| `volumeMounts` _[VolumeMount](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#volumemount-v1-core) array_ | Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. | -| `securityContext` _[SecurityContext](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#securitycontext-v1-core)_ | SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ | -| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler. | -| `upscalingMode` _[UpscalingMode](#upscalingmode)_ | UpscalingMode is "Conservative", "Default", or "Aggressive." Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. Default: Upscaling is not rate-limited. Aggressive: An alias for Default; upscaling is not rate-limited. It is not read by the KubeRay operator but by the Ray autoscaler. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core)_ | Resources specifies optional resource request and limit overrides for the autoscaler container.
Default values: 500m CPU request and limit. 512Mi memory request and limit. | | | +| `image` _string_ | Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. | | | +| `imagePullPolicy` _[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#pullpolicy-v1-core)_ | ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. | | | +| `securityContext` _[SecurityContext](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#securitycontext-v1-core)_ | SecurityContext defines the security options the container should be run with.
If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ | | | +| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler. | | | +| `upscalingMode` _[UpscalingMode](#upscalingmode)_ | UpscalingMode is "Conservative", "Default", or "Aggressive."
Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
Default: Upscaling is not rate-limited.
Aggressive: An alias for Default; upscaling is not rate-limited.
It is not read by the KubeRay operator but by the Ray autoscaler. | | Enum: [Default Aggressive Conservative]
| +| `env` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Optional list of environment variables to set in the autoscaler container. | | | +| `envFrom` _[EnvFromSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envfromsource-v1-core) array_ | Optional list of sources to populate environment variables in the autoscaler container. | | | +| `volumeMounts` _[VolumeMount](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#volumemount-v1-core) array_ | Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. | | | +#### DeletionPolicy +_Underlying type:_ _string_ -#### HeadGroupSpec -HeadGroupSpec are the spec for the head pod + +_Appears in:_ +- [RayJobSpec](#rayjobspec) + + + + + +#### GcsFaultToleranceOptions + + + +GcsFaultToleranceOptions contains configs for GCS FT + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `serviceType` _[ServiceType](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#servicetype-v1-core)_ | ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod | -| `headService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | HeadService is the Kubernetes service of the head pod. | -| `enableIngress` _boolean_ | EnableIngress indicates whether operator should create ingress object for head service or not. | -| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: node-manager-port, object-store-memory, ... | -| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is the exact pod template used in K8s depoyments, statefulsets, etc. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `redisUsername` _[RedisCredential](#rediscredential)_ | | | | +| `redisPassword` _[RedisCredential](#rediscredential)_ | | | | +| `externalStorageNamespace` _string_ | | | | +| `redisAddress` _string_ | | | | -#### JobFailedReason +#### HeadGroupSpec + + + +HeadGroupSpec are the spec for the head pod -_Underlying type:_ _string_ -JobFailedReason indicates the reason the RayJob changes its JobDeploymentStatus to 'Failed' _Appears in:_ -- [RayJobStatus](#rayjobstatus) +- [RayClusterSpec](#rayclusterspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `serviceType` _[ServiceType](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#servicetype-v1-core)_ | ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod | | | +| `headService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | HeadService is the Kubernetes service of the head pod. | | | +| `enableIngress` _boolean_ | EnableIngress indicates whether operator should create ingress object for head service or not. | | | +| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: node-manager-port, object-store-memory, ... | | | +| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is the exact pod template used in K8s depoyments, statefulsets, etc. | | | + @@ -75,6 +102,8 @@ _Underlying type:_ _string_ + + _Appears in:_ - [RayJobSpec](#rayjobspec) @@ -88,12 +117,16 @@ RayCluster is the Schema for the RayClusters API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1` -| `kind` _string_ | `RayCluster` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayClusterSpec](#rayclusterspec)_ | Specification of the desired behavior of the RayCluster. | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1` | | | +| `kind` _string_ | `RayCluster` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayClusterSpec](#rayclusterspec)_ | Specification of the desired behavior of the RayCluster. | | | + + #### RayClusterSpec @@ -102,20 +135,24 @@ RayCluster is the Schema for the RayClusters API RayClusterSpec defines the desired state of RayCluster + + _Appears in:_ - [RayCluster](#raycluster) - [RayJobSpec](#rayjobspec) - [RayServiceSpec](#rayservicespec) -| Field | Description | -| --- | --- | -| `headGroupSpec` _[HeadGroupSpec](#headgroupspec)_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file HeadGroupSpecs are the spec for the head pod | -| `workerGroupSpecs` _[WorkerGroupSpec](#workergroupspec) array_ | WorkerGroupSpecs are the specs for the worker pods | -| `rayVersion` _string_ | RayVersion is used to determine the command for the Kubernetes Job managed by RayJob | -| `enableInTreeAutoscaling` _boolean_ | EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs | -| `autoscalerOptions` _[AutoscalerOptions](#autoscaleroptions)_ | AutoscalerOptions specifies optional configuration for the Ray autoscaler. | -| `headServiceAnnotations` _object (keys:string, values:string)_ | | -| `suspend` _boolean_ | Suspend indicates whether a RayCluster should be suspended. A suspended RayCluster will have head pods and worker pods deleted. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `suspend` _boolean_ | Suspend indicates whether a RayCluster should be suspended.
A suspended RayCluster will have head pods and worker pods deleted. | | | +| `managedBy` _string_ | ManagedBy is an optional configuration for the controller or entity that manages a RayCluster.
The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.
The kuberay-operator reconciles a RayCluster which doesn't have this field at all or
the field value is the reserved string 'ray.io/kuberay-operator',
but delegates reconciling the RayCluster with 'kueue.x-k8s.io/multikueue' to the Kueue.
The field is immutable. | | | +| `autoscalerOptions` _[AutoscalerOptions](#autoscaleroptions)_ | AutoscalerOptions specifies optional configuration for the Ray autoscaler. | | | +| `headServiceAnnotations` _object (keys:string, values:string)_ | | | | +| `enableInTreeAutoscaling` _boolean_ | EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs | | | +| `gcsFaultToleranceOptions` _[GcsFaultToleranceOptions](#gcsfaulttoleranceoptions)_ | GcsFaultToleranceOptions for enabling GCS FT | | | +| `headGroupSpec` _[HeadGroupSpec](#headgroupspec)_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Important: Run "make" to regenerate code after modifying this file
HeadGroupSpecs are the spec for the head pod | | | +| `rayVersion` _string_ | RayVersion is used to determine the command for the Kubernetes Job managed by RayJob | | | +| `workerGroupSpecs` _[WorkerGroupSpec](#workergroupspec) array_ | WorkerGroupSpecs are the specs for the worker pods | | | #### RayJob @@ -126,12 +163,14 @@ RayJob is the Schema for the rayjobs API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1` -| `kind` _string_ | `RayJob` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayJobSpec](#rayjobspec)_ | | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1` | | | +| `kind` _string_ | `RayJob` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayJobSpec](#rayjobspec)_ | | | | #### RayJobSpec @@ -140,26 +179,32 @@ RayJob is the Schema for the rayjobs API RayJobSpec defines the desired state of RayJob + + _Appears in:_ - [RayJob](#rayjob) -| Field | Description | -| --- | --- | -| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file | -| `metadata` _object (keys:string, values:string)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration provided as a multi-line YAML string. | -| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | -| `shutdownAfterJobFinishes` _boolean_ | ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. | -| `ttlSecondsAfterFinished` _integer_ | TTLSecondsAfterFinished is the TTL to clean up RayCluster. It's only working when ShutdownAfterJobFinishes set to true. | -| `activeDeadlineSeconds` _integer_ | ActiveDeadlineSeconds is the duration in seconds that the RayJob may be active before KubeRay actively tries to terminate the RayJob; value must be positive integer. | -| `rayClusterSpec` _[RayClusterSpec](#rayclusterspec)_ | RayClusterSpec is the cluster template to run the job | -| `clusterSelector` _object (keys:string, values:string)_ | clusterSelector is used to select running rayclusters by labels | -| `submissionMode` _[JobSubmissionMode](#jobsubmissionmode)_ | SubmissionMode specifies how RayJob submits the Ray job to the RayCluster. In "K8sJobMode", the KubeRay operator creates a submitter Kubernetes Job to submit the Ray job. In "HTTPMode", the KubeRay operator sends a request to the RayCluster to create a Ray job. | -| `suspend` _boolean_ | suspend specifies whether the RayJob controller should create a RayCluster instance If a job is applied with the suspend field set to true, the RayCluster will not be created and will wait for the transition to false. If the RayCluster is already created, it will be deleted. In case of transition to false a new RayCluster will be created. | -| `submitterPodTemplate` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | SubmitterPodTemplate is the template for the pod that will run `ray job submit`. | -| `entrypointNumCpus` _float_ | EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. | -| `entrypointNumGpus` _float_ | EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. | -| `entrypointResources` _string_ | EntrypointResources specifies the custom resources and quantities to reserve for the entrypoint command. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `activeDeadlineSeconds` _integer_ | ActiveDeadlineSeconds is the duration in seconds that the RayJob may be active before
KubeRay actively tries to terminate the RayJob; value must be positive integer. | | | +| `backoffLimit` _integer_ | Specifies the number of retries before marking this job failed.
Each retry creates a new RayCluster. | 0 | | +| `rayClusterSpec` _[RayClusterSpec](#rayclusterspec)_ | RayClusterSpec is the cluster template to run the job | | | +| `submitterPodTemplate` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | SubmitterPodTemplate is the template for the pod that will run `ray job submit`. | | | +| `metadata` _object (keys:string, values:string)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `clusterSelector` _object (keys:string, values:string)_ | clusterSelector is used to select running rayclusters by labels | | | +| `submitterConfig` _[SubmitterConfig](#submitterconfig)_ | Configurations of submitter k8s job. | | | +| `managedBy` _string_ | ManagedBy is an optional configuration for the controller or entity that manages a RayJob.
The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.
The kuberay-operator reconciles a RayJob which doesn't have this field at all or
the field value is the reserved string 'ray.io/kuberay-operator',
but delegates reconciling the RayJob with 'kueue.x-k8s.io/multikueue' to the Kueue.
The field is immutable. | | | +| `deletionPolicy` _[DeletionPolicy](#deletionpolicy)_ | DeletionPolicy indicates what resources of the RayJob are deleted upon job completion.
Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'DeleteNone'.
If unset, deletion policy is based on 'spec.shutdownAfterJobFinishes'.
This field requires the RayJobDeletionPolicy feature gate to be enabled. | | | +| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Important: Run "make" to regenerate code after modifying this file | | | +| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration
provided as a multi-line YAML string. | | | +| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | | | +| `submissionMode` _[JobSubmissionMode](#jobsubmissionmode)_ | SubmissionMode specifies how RayJob submits the Ray job to the RayCluster.
In "K8sJobMode", the KubeRay operator creates a submitter Kubernetes Job to submit the Ray job.
In "HTTPMode", the KubeRay operator sends a request to the RayCluster to create a Ray job.
In "InteractiveMode", the KubeRay operator waits for a user to submit a job to the Ray cluster. | K8sJobMode | | +| `entrypointResources` _string_ | EntrypointResources specifies the custom resources and quantities to reserve for the
entrypoint command. | | | +| `entrypointNumCpus` _float_ | EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. | | | +| `entrypointNumGpus` _float_ | EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. | | | +| `ttlSecondsAfterFinished` _integer_ | TTLSecondsAfterFinished is the TTL to clean up RayCluster.
It's only working when ShutdownAfterJobFinishes set to true. | 0 | | +| `shutdownAfterJobFinishes` _boolean_ | ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. | | | +| `suspend` _boolean_ | suspend specifies whether the RayJob controller should create a RayCluster instance
If a job is applied with the suspend field set to true,
the RayCluster will not be created and will wait for the transition to false.
If the RayCluster is already created, it will be deleted.
In case of transition to false a new RayCluster will be created. | | | @@ -172,12 +217,14 @@ RayService is the Schema for the rayservices API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1` -| `kind` _string_ | `RayService` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayServiceSpec](#rayservicespec)_ | | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1` | | | +| `kind` _string_ | `RayService` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayServiceSpec](#rayservicespec)_ | | | | #### RayServiceSpec @@ -186,18 +233,68 @@ RayService is the Schema for the rayservices API RayServiceSpec defines the desired state of RayService + + _Appears in:_ - [RayService](#rayservice) -| Field | Description | -| --- | --- | -| `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | -| `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | -| `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | -| `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | -| `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | +| `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | +| `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | | | +| `upgradeStrategy` _[RayServiceUpgradeStrategy](#rayserviceupgradestrategy)_ | UpgradeStrategy defines the scaling policy used when upgrading the RayService. | | | +| `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file
Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | | | +| `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | | | +| `excludeHeadPodFromServeSvc` _boolean_ | If the field is set to true, the value of the label `ray.io/serve` on the head Pod should always be false.
Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service. | | | + + + + +#### RayServiceUpgradeStrategy + + + + + + + +_Appears in:_ +- [RayServiceSpec](#rayservicespec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `type` _[RayServiceUpgradeType](#rayserviceupgradetype)_ | Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. | | | +#### RayServiceUpgradeType + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [RayServiceUpgradeStrategy](#rayserviceupgradestrategy) + + + +#### RedisCredential + + + +RedisCredential is the redis username/password or a reference to the source containing the username/password + + + +_Appears in:_ +- [GcsFaultToleranceOptions](#gcsfaulttoleranceoptions) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `valueFrom` _[EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvarsource-v1-core)_ | | | | +| `value` _string_ | | | | #### ScaleStrategy @@ -206,12 +303,30 @@ _Appears in:_ ScaleStrategy to remove workers + + _Appears in:_ - [WorkerGroupSpec](#workergroupspec) -| Field | Description | -| --- | --- | -| `workersToDelete` _string array_ | WorkersToDelete workers to be deleted | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `workersToDelete` _string array_ | WorkersToDelete workers to be deleted | | | + + +#### SubmitterConfig + + + + + + + +_Appears in:_ +- [RayJobSpec](#rayjobspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `backoffLimit` _integer_ | BackoffLimit of the submitter k8s job. | | | #### UpscalingMode @@ -220,6 +335,9 @@ _Underlying type:_ _string_ +_Validation:_ +- Enum: [Default Aggressive Conservative] + _Appears in:_ - [AutoscalerOptions](#autoscaleroptions) @@ -231,19 +349,23 @@ _Appears in:_ WorkerGroupSpec are the specs for the worker pods + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `groupName` _string_ | we can have multiple worker groups, we distinguish them by name | -| `replicas` _integer_ | Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | -| `minReplicas` _integer_ | MinReplicas denotes the minimum number of desired Pods for this worker group. | -| `maxReplicas` _integer_ | MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | -| `numOfHosts` _integer_ | NumOfHosts denotes the number of hosts to create per replica. The default value is 1. | -| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: address, object-store-memory, ... | -| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is a pod template for the worker | -| `scaleStrategy` _[ScaleStrategy](#scalestrategy)_ | ScaleStrategy defines which pods to remove | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `suspend` _boolean_ | Suspend indicates whether a worker group should be suspended.
A suspended worker group will have all pods deleted.
This is not a user-facing API and is only used by RayJob DeletionPolicy. | | | +| `groupName` _string_ | we can have multiple worker groups, we distinguish them by name | | | +| `replicas` _integer_ | Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | 0 | | +| `minReplicas` _integer_ | MinReplicas denotes the minimum number of desired Pods for this worker group. | 0 | | +| `maxReplicas` _integer_ | MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | 2147483647 | | +| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds denotes the number of seconds to wait before the v2 autoscaler terminates an idle worker pod of this type.
This value is only used with the Ray Autoscaler enabled and defaults to the value set by the AutoscalingConfig if not specified for this worker group. | | | +| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: address, object-store-memory, ... | | | +| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is a pod template for the worker | | | +| `scaleStrategy` _[ScaleStrategy](#scalestrategy)_ | ScaleStrategy defines which pods to remove | | | +| `numOfHosts` _integer_ | NumOfHosts denotes the number of hosts to create per replica. The default value is 1. | 1 | | @@ -265,20 +387,22 @@ Package v1alpha1 contains API Schema definitions for the ray v1alpha1 API group AutoscalerOptions specifies optional configuration for the Ray autoscaler. + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core)_ | Resources specifies optional resource request and limit overrides for the autoscaler container. Default values: 500m CPU request and limit. 512Mi memory request and limit. | -| `image` _string_ | Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. | -| `imagePullPolicy` _[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#pullpolicy-v1-core)_ | ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. | -| `env` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Optional list of environment variables to set in the autoscaler container. | -| `envFrom` _[EnvFromSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envfromsource-v1-core) array_ | Optional list of sources to populate environment variables in the autoscaler container. | -| `volumeMounts` _[VolumeMount](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#volumemount-v1-core) array_ | Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. | -| `securityContext` _[SecurityContext](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#securitycontext-v1-core)_ | SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ | -| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler. | -| `upscalingMode` _[UpscalingMode](#upscalingmode)_ | UpscalingMode is "Conservative", "Default", or "Aggressive." Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. Default: Upscaling is not rate-limited. Aggressive: An alias for Default; upscaling is not rate-limited. It is not read by the KubeRay operator but by the Ray autoscaler. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core)_ | Resources specifies optional resource request and limit overrides for the autoscaler container.
Default values: 500m CPU request and limit. 512Mi memory request and limit. | | | +| `image` _string_ | Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. | | | +| `imagePullPolicy` _[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#pullpolicy-v1-core)_ | ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. | | | +| `securityContext` _[SecurityContext](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#securitycontext-v1-core)_ | SecurityContext defines the security options the container should be run with.
If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ | | | +| `idleTimeoutSeconds` _integer_ | IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler. | | | +| `upscalingMode` _[UpscalingMode](#upscalingmode)_ | UpscalingMode is "Conservative", "Default", or "Aggressive."
Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
Default: Upscaling is not rate-limited.
Aggressive: An alias for Default; upscaling is not rate-limited.
It is not read by the KubeRay operator but by the Ray autoscaler. | | Enum: [Default Aggressive Conservative]
| +| `env` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Optional list of environment variables to set in the autoscaler container. | | | +| `envFrom` _[EnvFromSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envfromsource-v1-core) array_ | Optional list of sources to populate environment variables in the autoscaler container. | | | +| `volumeMounts` _[VolumeMount](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#volumemount-v1-core) array_ | Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. | | | @@ -289,16 +413,18 @@ _Appears in:_ HeadGroupSpec are the spec for the head pod + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `serviceType` _[ServiceType](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#servicetype-v1-core)_ | ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod | -| `headService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | HeadService is the Kubernetes service of the head pod. | -| `enableIngress` _boolean_ | EnableIngress indicates whether operator should create ingress object for head service or not. | -| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: node-manager-port, object-store-memory, ... | -| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is the exact pod template used in K8s depoyments, statefulsets, etc. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `serviceType` _[ServiceType](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#servicetype-v1-core)_ | ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod | | | +| `headService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | HeadService is the Kubernetes service of the head pod. | | | +| `enableIngress` _boolean_ | EnableIngress indicates whether operator should create ingress object for head service or not. | | | +| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: node-manager-port, object-store-memory, ... | | | +| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is the exact pod template used in K8s depoyments, statefulsets, etc. | | | #### RayCluster @@ -309,12 +435,14 @@ RayCluster is the Schema for the RayClusters API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1alpha1` -| `kind` _string_ | `RayCluster` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayClusterSpec](#rayclusterspec)_ | Specification of the desired behavior of the RayCluster. | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1alpha1` | | | +| `kind` _string_ | `RayCluster` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayClusterSpec](#rayclusterspec)_ | Specification of the desired behavior of the RayCluster. | | | #### RayClusterSpec @@ -323,20 +451,22 @@ RayCluster is the Schema for the RayClusters API RayClusterSpec defines the desired state of RayCluster + + _Appears in:_ - [RayCluster](#raycluster) - [RayJobSpec](#rayjobspec) - [RayServiceSpec](#rayservicespec) -| Field | Description | -| --- | --- | -| `headGroupSpec` _[HeadGroupSpec](#headgroupspec)_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file HeadGroupSpecs are the spec for the head pod | -| `workerGroupSpecs` _[WorkerGroupSpec](#workergroupspec) array_ | WorkerGroupSpecs are the specs for the worker pods | -| `rayVersion` _string_ | RayVersion is used to determine the command for the Kubernetes Job managed by RayJob | -| `enableInTreeAutoscaling` _boolean_ | EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs | -| `autoscalerOptions` _[AutoscalerOptions](#autoscaleroptions)_ | AutoscalerOptions specifies optional configuration for the Ray autoscaler. | -| `headServiceAnnotations` _object (keys:string, values:string)_ | | -| `suspend` _boolean_ | Suspend indicates whether a RayCluster should be suspended. A suspended RayCluster will have head pods and worker pods deleted. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enableInTreeAutoscaling` _boolean_ | EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs | | | +| `autoscalerOptions` _[AutoscalerOptions](#autoscaleroptions)_ | AutoscalerOptions specifies optional configuration for the Ray autoscaler. | | | +| `suspend` _boolean_ | Suspend indicates whether a RayCluster should be suspended.
A suspended RayCluster will have head pods and worker pods deleted. | | | +| `headServiceAnnotations` _object (keys:string, values:string)_ | | | | +| `headGroupSpec` _[HeadGroupSpec](#headgroupspec)_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Important: Run "make" to regenerate code after modifying this file
HeadGroupSpecs are the spec for the head pod | | | +| `rayVersion` _string_ | RayVersion is used to determine the command for the Kubernetes Job managed by RayJob | | | +| `workerGroupSpecs` _[WorkerGroupSpec](#workergroupspec) array_ | WorkerGroupSpecs are the specs for the worker pods | | | #### RayJob @@ -347,12 +477,14 @@ RayJob is the Schema for the rayjobs API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1alpha1` -| `kind` _string_ | `RayJob` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayJobSpec](#rayjobspec)_ | | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1alpha1` | | | +| `kind` _string_ | `RayJob` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayJobSpec](#rayjobspec)_ | | | | #### RayJobSpec @@ -361,24 +493,26 @@ RayJob is the Schema for the rayjobs API RayJobSpec defines the desired state of RayJob + + _Appears in:_ - [RayJob](#rayjob) -| Field | Description | -| --- | --- | -| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster Important: Run "make" to regenerate code after modifying this file | -| `metadata` _object (keys:string, values:string)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration provided as a multi-line YAML string. | -| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | -| `shutdownAfterJobFinishes` _boolean_ | ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. | -| `ttlSecondsAfterFinished` _integer_ | TTLSecondsAfterFinished is the TTL to clean up RayCluster. It's only working when ShutdownAfterJobFinishes set to true. | -| `rayClusterSpec` _[RayClusterSpec](#rayclusterspec)_ | RayClusterSpec is the cluster template to run the job | -| `clusterSelector` _object (keys:string, values:string)_ | clusterSelector is used to select running rayclusters by labels | -| `suspend` _boolean_ | suspend specifies whether the RayJob controller should create a RayCluster instance If a job is applied with the suspend field set to true, the RayCluster will not be created and will wait for the transition to false. If the RayCluster is already created, it will be deleted. In case of transition to false a new RayCluster will be created. | -| `submitterPodTemplate` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | SubmitterPodTemplate is the template for the pod that will run `ray job submit`. | -| `entrypointNumCpus` _float_ | EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. | -| `entrypointNumGpus` _float_ | EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. | -| `entrypointResources` _string_ | EntrypointResources specifies the custom resources and quantities to reserve for the entrypoint command. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `submitterPodTemplate` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | SubmitterPodTemplate is the template for the pod that will run `ray job submit`. | | | +| `metadata` _object (keys:string, values:string)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `rayClusterSpec` _[RayClusterSpec](#rayclusterspec)_ | RayClusterSpec is the cluster template to run the job | | | +| `clusterSelector` _object (keys:string, values:string)_ | ClusterSelector is used to select running rayclusters by labels | | | +| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Important: Run "make" to regenerate code after modifying this file | | | +| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration
provided as a multi-line YAML string. | | | +| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | | | +| `entrypointResources` _string_ | EntrypointResources specifies the custom resources and quantities to reserve for the
entrypoint command. | | | +| `ttlSecondsAfterFinished` _integer_ | TTLSecondsAfterFinished is the TTL to clean up RayCluster.
It's only working when ShutdownAfterJobFinishes set to true. | 0 | | +| `entrypointNumCpus` _float_ | EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. | | | +| `entrypointNumGpus` _float_ | EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. | | | +| `shutdownAfterJobFinishes` _boolean_ | ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. | | | +| `suspend` _boolean_ | Suspend specifies whether the RayJob controller should create a RayCluster instance
If a job is applied with the suspend field set to true,
the RayCluster will not be created and will wait for the transition to false.
If the RayCluster is already created, it will be deleted.
In case of transition to false a new RayCluster will be created. | | | @@ -391,12 +525,14 @@ RayService is the Schema for the rayservices API -| Field | Description | -| --- | --- | -| `apiVersion` _string_ | `ray.io/v1alpha1` -| `kind` _string_ | `RayService` -| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | -| `spec` _[RayServiceSpec](#rayservicespec)_ | | + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `ray.io/v1alpha1` | | | +| `kind` _string_ | `RayService` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[RayServiceSpec](#rayservicespec)_ | | | | #### RayServiceSpec @@ -405,16 +541,18 @@ RayService is the Schema for the rayservices API RayServiceSpec defines the desired state of RayService + + _Appears in:_ - [RayService](#rayservice) -| Field | Description | -| --- | --- | -| `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | -| `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | -| `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | -| `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | -| `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | | | +| `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | +| `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | +| `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file
Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | | | +| `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | | | @@ -425,12 +563,14 @@ _Appears in:_ ScaleStrategy to remove workers + + _Appears in:_ - [WorkerGroupSpec](#workergroupspec) -| Field | Description | -| --- | --- | -| `workersToDelete` _string array_ | WorkersToDelete workers to be deleted | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `workersToDelete` _string array_ | WorkersToDelete workers to be deleted | | | #### UpscalingMode @@ -439,6 +579,9 @@ _Underlying type:_ _string_ +_Validation:_ +- Enum: [Default Aggressive Conservative] + _Appears in:_ - [AutoscalerOptions](#autoscaleroptions) @@ -450,17 +593,19 @@ _Appears in:_ WorkerGroupSpec are the specs for the worker pods + + _Appears in:_ - [RayClusterSpec](#rayclusterspec) -| Field | Description | -| --- | --- | -| `groupName` _string_ | we can have multiple worker groups, we distinguish them by name | -| `replicas` _integer_ | Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | -| `minReplicas` _integer_ | MinReplicas denotes the minimum number of desired Pods for this worker group. | -| `maxReplicas` _integer_ | MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | -| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: address, object-store-memory, ... | -| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is a pod template for the worker | -| `scaleStrategy` _[ScaleStrategy](#scalestrategy)_ | ScaleStrategy defines which pods to remove | +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `groupName` _string_ | we can have multiple worker groups, we distinguish them by name | | | +| `replicas` _integer_ | Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | 0 | | +| `minReplicas` _integer_ | MinReplicas denotes the minimum number of desired Pods for this worker group. | 0 | | +| `maxReplicas` _integer_ | MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | 2147483647 | | +| `rayStartParams` _object (keys:string, values:string)_ | RayStartParams are the params of the start command: address, object-store-memory, ... | | | +| `template` _[PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#podtemplatespec-v1-core)_ | Template is a pod template for the worker | | | +| `scaleStrategy` _[ScaleStrategy](#scalestrategy)_ | ScaleStrategy defines which pods to remove | | | diff --git a/docs/release/changelog.md b/docs/release/changelog.md index 731498b7f42..edd12dfeca4 100644 --- a/docs/release/changelog.md +++ b/docs/release/changelog.md @@ -16,7 +16,7 @@ ``` git log v0.3.0..v0.4.0 --oneline ``` - + You may need to run the following command first: ``` @@ -29,25 +29,25 @@ Run the script to generate changelogs. ``` from github import Github import re - - + + class ChangelogGenerator: def __init__(self, github_repo): # Replace with your Github Token self._github = Github('') self._github_repo = self._github.get_repo(github_repo) - + def generate(self, pr_id): pr = self._github_repo.get_pull(pr_id) - + return "{title} ([#{pr_id}]({pr_link}), @{user})".format( title=pr.title, pr_id=pr_id, pr_link=pr.html_url, user=pr.user.login ) - - + + # generated by `git log .. --oneline` payload = ''' 7374e2c [RayService] Skip update events without change (#811) (#825) diff --git a/docs/release/helm-chart.md b/docs/release/helm-chart.md index 8dfdd07e89e..0fd8ed79fff 100644 --- a/docs/release/helm-chart.md +++ b/docs/release/helm-chart.md @@ -28,7 +28,7 @@ You can validate the charts as follows: ```sh helm repo add kuberay https://ray-project.github.io/kuberay-helm/ helm repo update - + # List all charts helm search repo kuberay diff --git a/experimental/Dockerfile b/experimental/Dockerfile index fd51de3b3ef..22eab9c5265 100644 --- a/experimental/Dockerfile +++ b/experimental/Dockerfile @@ -1,5 +1,5 @@ # Build security proxy -FROM registry.access.redhat.com/ubi9/go-toolset:1.20.10 as builder +FROM golang:1.22.4-bullseye as builder WORKDIR /workspace # Copy the Go Modules manifests diff --git a/experimental/Makefile b/experimental/Makefile index 2f4bd5ba7b2..16337c26d75 100644 --- a/experimental/Makefile +++ b/experimental/Makefile @@ -57,7 +57,7 @@ fumpt: gofumpt ## Run gofmtumpt against code. .PHONY: lint lint: golangci-lint fmt vet fumpt ## Run the linter. - $(GOLANGCI_LINT) run --timeout=3m + $(GOLANGCI_LINT) run --timeout=3m --no-config build: fmt vet fumpt lint ## Build api server binary. go build -o ${REPO_ROOT_BIN}/kuberay-apiserver-proxy cmd/main.go @@ -91,7 +91,7 @@ GOBINDATA_VERSION ?= v4.0.2 .PHONY: gofumpt gofumpt: $(GOFUMPT) ## Download gofumpt locally if necessary. -$(GOFUMPT): $(REPO_ROOT_BIN) +$(GOFUMPT): $(REPO_ROOT_BIN) -s $(GOFUMPT) || GOBIN=$(REPO_ROOT_BIN) go install mvdan.cc/gofumpt@$(GOFUMPT_VERSION) .PHONY: golangci-lint @@ -111,4 +111,4 @@ dev-tools: golangci-lint gofumpt go-bindata ## Install all development tools clean-dev-tools: ## Remove all development tools rm -f $(REPO_ROOT_BIN)/golangci-lint rm -f $(REPO_ROOT_BIN)/gofumpt - rm -f $(REPO_ROOT_BIN)/go-bindata \ No newline at end of file + rm -f $(REPO_ROOT_BIN)/go-bindata diff --git a/experimental/cmd/main.go b/experimental/cmd/main.go index 852fa5fadac..45910f789c8 100644 --- a/experimental/cmd/main.go +++ b/experimental/cmd/main.go @@ -59,7 +59,7 @@ func main() { go func() { remote_url := "http://localhost:" + http_remote_port // Client connection - cc, err := grpc.Dial(remote_url, grpc.WithTransportCredentials(insecure.NewCredentials())) + cc, err := grpc.NewClient(remote_url, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { klog.Fatal("cannot dial server: ", err) } @@ -112,6 +112,6 @@ func main() { // Run HTTP proxy err = http.ListenAndServe(":"+http_local_port, nil) if err != nil { - klog.Fatal("HTTP server died unexpectidly, error - ", err) + klog.Fatal("HTTP server died unexpectedly, error - ", err) } } diff --git a/experimental/go.mod b/experimental/go.mod index f37a409589f..66b4f36bdcf 100644 --- a/experimental/go.mod +++ b/experimental/go.mod @@ -3,16 +3,15 @@ module github.com/ray-project/kuberay/security go 1.20 require ( - google.golang.org/grpc v1.59.0 - google.golang.org/protobuf v1.33.0 - k8s.io/klog/v2 v2.110.1 + google.golang.org/grpc v1.64.1 + google.golang.org/protobuf v1.34.2 + k8s.io/klog/v2 v2.130.1 ) require ( - github.com/go-logr/logr v1.3.0 // indirect - github.com/golang/protobuf v1.5.3 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + github.com/go-logr/logr v1.4.2 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d // indirect ) diff --git a/experimental/go.sum b/experimental/go.sum index 7dcc743fd28..005ce60d64a 100644 --- a/experimental/go.sum +++ b/experimental/go.sum @@ -1,24 +1,17 @@ -github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d h1:k3zyW3BYYR30e8v3x0bTDdE9vpYFjZHK+HcyqkrppWk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/grpc v1.64.1 h1:LKtvyfbX3UGVPFcGqJ9ItpVWW6oN/2XqTxfAnwRRXiA= +google.golang.org/grpc v1.64.1/go.mod h1:hiQF4LFZelK2WKaP6W0L92zGHtiQdZxk8CrSdvyjeP0= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= diff --git a/experimental/pkg/grpcproxy/handler.go b/experimental/pkg/grpcproxy/handler.go index 7a9e38b3017..df5f1a86e3d 100644 --- a/experimental/pkg/grpcproxy/handler.go +++ b/experimental/pkg/grpcproxy/handler.go @@ -19,7 +19,7 @@ var clientStreamDescForProxying = &grpc.StreamDesc{ } // RegisterService sets up a proxy handler for a particular gRPC service and method. -// The behaviour is the same as if you were registering a handler method, e.g. from a generated pb.go file. +// The behavior is the same as if you were registering a handler method, e.g. from a generated pb.go file. func RegisterService(server *grpc.Server, director StreamDirector, serviceName string, methodNames ...string) *handler { streamer := &handler{director: director, securityheader: nil} fakeDesc := &grpc.ServiceDesc{ @@ -76,11 +76,11 @@ func (s *handler) handler(srv interface{}, serverStream grpc.ServerStream) error if v, exists := header[h_name]; exists { // Authentication header exists if v[0] != strings.ToLower(header_value) { - return status.Error(codes.Unauthenticated, "Request unauthorised") + return status.Error(codes.Unauthenticated, "Request unauthorized") } } else { // Authentication header does not exist - return status.Error(codes.Unauthenticated, "Request unauthorised") + return status.Error(codes.Unauthenticated, "Request unauthorized") } } } diff --git a/experimental/pkg/httpproxy/support.go b/experimental/pkg/httpproxy/support.go index 9e0b4bce387..8594353966b 100644 --- a/experimental/pkg/httpproxy/support.go +++ b/experimental/pkg/httpproxy/support.go @@ -16,18 +16,18 @@ type authorization struct { upstream *url.URL } -// Create Unauthorised response +// Create Unauthorized response func WriteUnauthorisedResponse(w http.ResponseWriter) { - w.WriteHeader(401) - _, err := w.Write([]byte("Unauthorised\n")) + w.WriteHeader(http.StatusUnauthorized) + _, err := w.Write([]byte("Unauthorized\n")) if err != nil { - klog.Info("failed writing unauthorised response ", err) + klog.Info("failed writing unauthorized response ", err) } } // Create bad request response func WriteBadRequestResponse(w http.ResponseWriter) { - w.WriteHeader(400) + w.WriteHeader(http.StatusBadRequest) _, err := w.Write([]byte("Bad Request\n")) if err != nil { klog.Info("failed writing bad request response ", err) @@ -36,7 +36,7 @@ func WriteBadRequestResponse(w http.ResponseWriter) { // Create internal error response func WriteInternalErrorResponse(w http.ResponseWriter) { - w.WriteHeader(500) + w.WriteHeader(http.StatusInternalServerError) _, err := w.Write([]byte("Internal Server Error\n")) if err != nil { klog.Info("failed writing internal error response ", err) diff --git a/helm-chart/kuberay-apiserver/templates/deployment.yaml b/helm-chart/kuberay-apiserver/templates/deployment.yaml index 887caa907fc..8a7d9342001 100644 --- a/helm-chart/kuberay-apiserver/templates/deployment.yaml +++ b/helm-chart/kuberay-apiserver/templates/deployment.yaml @@ -87,4 +87,4 @@ spec: {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} diff --git a/helm-chart/kuberay-apiserver/templates/service.yaml b/helm-chart/kuberay-apiserver/templates/service.yaml index c7af716c0d2..51e383a0cda 100644 --- a/helm-chart/kuberay-apiserver/templates/service.yaml +++ b/helm-chart/kuberay-apiserver/templates/service.yaml @@ -30,4 +30,3 @@ spec: nodePort: {{ $port.nodePort }} {{ end }} {{ end }} - diff --git a/helm-chart/kuberay-operator/README.md b/helm-chart/kuberay-operator/README.md index e4f58d96a65..bbdc804bfec 100644 --- a/helm-chart/kuberay-operator/README.md +++ b/helm-chart/kuberay-operator/README.md @@ -44,7 +44,7 @@ helm version # Step 2: Install KubeRay operator only. (for developer) helm install kuberay-operator kuberay/kuberay-operator --version 1.1.0 --skip-crds - ``` + ``` ## List the chart diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml index 9f037abe88f..bd062f63a56 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayclusters.ray.io spec: group: ray.io @@ -78,6 +78,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -116,6 +117,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -134,6 +136,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -144,6 +147,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -193,16 +197,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -267,6 +282,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -279,6 +296,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -323,6 +469,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -347,6 +494,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -394,6 +542,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -447,6 +597,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -468,6 +620,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -523,11 +676,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -539,11 +694,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -554,6 +711,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -570,11 +728,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -586,14 +746,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -619,17 +782,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -643,11 +818,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -658,6 +835,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -671,6 +849,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -687,17 +866,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -711,11 +902,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -726,12 +919,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -753,17 +948,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -777,11 +984,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -792,6 +1001,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -805,6 +1015,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -821,17 +1032,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -845,11 +1068,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -860,12 +1085,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -877,10 +1104,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -895,6 +1124,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -933,6 +1163,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -945,12 +1176,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -961,6 +1196,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -968,6 +1204,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -982,6 +1219,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -999,6 +1237,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1011,6 +1250,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1032,6 +1279,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1049,6 +1297,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1061,6 +1310,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1083,6 +1340,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1113,6 +1371,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1187,6 +1446,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1217,6 +1477,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1307,16 +1568,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1372,6 +1644,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1402,6 +1675,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1464,6 +1738,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1475,6 +1752,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1484,18 +1763,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1505,10 +1791,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1521,10 +1809,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1539,6 +1829,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1577,6 +1868,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1589,12 +1881,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1605,6 +1901,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1612,6 +1909,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1626,6 +1924,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1643,6 +1942,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1655,6 +1955,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1676,6 +1984,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1693,6 +2002,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1705,6 +2015,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1727,6 +2045,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1757,6 +2076,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1831,6 +2151,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1861,6 +2182,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1951,16 +2273,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2016,6 +2349,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2046,6 +2380,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2110,6 +2445,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2121,6 +2459,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2130,12 +2470,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2143,10 +2489,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2161,10 +2513,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2172,10 +2528,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2190,6 +2548,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2228,6 +2587,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2240,12 +2600,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2256,6 +2620,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2263,6 +2628,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2277,6 +2643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2294,6 +2661,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2306,6 +2674,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2327,6 +2703,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2344,6 +2721,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2356,6 +2734,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2378,6 +2764,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2408,6 +2795,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2482,6 +2870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2512,6 +2901,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2602,16 +2992,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2667,6 +3068,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2697,6 +3099,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2759,6 +3162,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2770,6 +3176,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2779,12 +3187,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2823,6 +3237,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2862,6 +3277,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2900,6 +3324,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2912,6 +3337,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2953,6 +3379,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2969,11 +3396,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3062,6 +3491,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3071,6 +3501,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3088,6 +3519,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3116,7 +3548,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3131,6 +3565,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3186,6 +3621,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3227,6 +3663,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3256,18 +3693,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3298,11 +3723,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3311,6 +3738,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3333,10 +3762,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3353,6 +3784,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3433,11 +3865,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3499,6 +3933,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3516,7 +3989,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3562,6 +4037,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3580,7 +4056,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3600,6 +4078,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3631,6 +4110,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3638,6 +4118,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3660,6 +4141,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3698,6 +4180,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3712,6 +4195,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3737,6 +4221,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3749,6 +4236,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3758,6 +4253,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3785,6 +4283,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3831,11 +4331,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3847,11 +4349,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3862,6 +4366,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3878,11 +4383,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3894,14 +4401,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3927,17 +4437,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3951,11 +4473,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3966,6 +4490,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3979,6 +4504,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -3995,17 +4521,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4019,11 +4557,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4034,12 +4574,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4061,17 +4603,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4085,11 +4639,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4100,6 +4656,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4113,6 +4670,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4129,17 +4687,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4153,11 +4723,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4168,12 +4740,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4185,10 +4759,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4203,6 +4779,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4241,6 +4818,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4253,12 +4831,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4269,6 +4851,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4276,6 +4859,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4290,6 +4874,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4307,6 +4892,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4319,6 +4905,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4340,6 +4934,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4357,6 +4952,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4369,6 +4965,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4391,6 +4995,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4421,6 +5026,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4495,6 +5101,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4525,6 +5132,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4615,16 +5223,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4680,6 +5299,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4710,6 +5330,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4772,6 +5393,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4783,6 +5407,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4792,18 +5418,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4813,10 +5446,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4829,10 +5464,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4847,6 +5484,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4885,6 +5523,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4897,12 +5536,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4913,6 +5556,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4920,6 +5564,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4934,6 +5579,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4951,6 +5597,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4963,6 +5610,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4984,6 +5639,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5001,6 +5657,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5011,7 +5668,15 @@ spec: scheme: type: string required: - - port + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds type: object tcpSocket: properties: @@ -5035,6 +5700,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5065,6 +5731,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5139,6 +5806,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5169,6 +5837,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5259,16 +5928,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5324,6 +6004,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5354,6 +6035,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5418,6 +6100,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5429,6 +6114,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5438,12 +6125,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5451,10 +6144,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5469,10 +6168,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5480,10 +6183,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5498,6 +6203,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5536,6 +6242,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5548,12 +6255,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5564,6 +6275,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5571,6 +6283,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5585,6 +6298,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5602,6 +6316,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5614,6 +6329,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5635,6 +6358,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5652,6 +6376,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5664,6 +6389,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5686,6 +6419,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5716,6 +6450,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5790,6 +6525,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5820,6 +6556,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5910,16 +6647,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5975,6 +6723,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -6005,6 +6754,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6067,6 +6817,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6078,6 +6831,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6087,12 +6842,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6131,6 +6892,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6170,6 +6932,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6208,6 +6979,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6220,6 +6992,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6261,6 +7034,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6277,11 +7051,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6370,6 +7146,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6379,6 +7156,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6396,6 +7174,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6424,7 +7203,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6439,6 +7220,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6494,6 +7276,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6535,6 +7318,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6564,18 +7348,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6606,11 +7378,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6619,6 +7393,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6641,10 +7417,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6661,6 +7439,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6741,11 +7520,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6807,6 +7588,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6824,7 +7644,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6870,6 +7692,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6888,7 +7711,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6908,6 +7733,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6939,6 +7765,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6946,6 +7773,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6968,6 +7796,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7006,6 +7835,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -7020,6 +7850,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7045,6 +7876,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7065,6 +7899,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7100,8 +7973,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7116,10 +7993,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object served: true @@ -7188,6 +8073,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7226,6 +8112,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7244,6 +8131,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7254,6 +8142,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7303,16 +8192,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7377,6 +8277,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -7433,6 +8335,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7457,6 +8360,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7504,6 +8408,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7557,6 +8463,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7578,6 +8486,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -7633,11 +8542,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7649,11 +8560,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7664,6 +8577,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7680,11 +8594,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7696,14 +8612,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -7729,17 +8648,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7753,11 +8684,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7768,6 +8701,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7781,6 +8715,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7797,17 +8732,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7821,11 +8768,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7836,12 +8785,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -7863,17 +8814,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7887,11 +8850,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7902,6 +8867,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7915,6 +8881,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7931,17 +8898,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7955,11 +8934,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7970,12 +8951,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -7987,10 +8970,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8005,6 +8990,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8043,6 +9029,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8055,12 +9042,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8071,6 +9062,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8078,6 +9070,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8092,6 +9085,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8109,6 +9103,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8121,6 +9116,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8142,6 +9145,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8159,6 +9163,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8171,6 +9176,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8193,6 +9206,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8223,6 +9237,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8297,6 +9312,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8327,6 +9343,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8417,16 +9434,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8482,6 +9510,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8512,6 +9541,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8574,6 +9604,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8585,6 +9618,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8594,18 +9629,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8615,10 +9657,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8631,10 +9675,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8649,6 +9695,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8687,6 +9734,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8699,12 +9747,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8715,6 +9767,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8722,6 +9775,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8736,6 +9790,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8753,6 +9808,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8765,6 +9821,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8786,6 +9850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8803,6 +9868,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8815,6 +9881,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8837,6 +9911,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8867,6 +9942,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8941,6 +10017,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8971,6 +10048,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9061,16 +10139,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9126,6 +10215,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9156,6 +10246,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9220,6 +10311,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9231,6 +10325,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9240,12 +10336,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -9253,10 +10355,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -9271,10 +10379,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -9282,10 +10394,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -9300,6 +10414,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9338,6 +10453,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9350,12 +10466,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -9366,6 +10486,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -9373,6 +10494,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9387,6 +10509,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9404,6 +10527,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9416,6 +10540,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9437,6 +10569,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9454,6 +10587,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9466,6 +10600,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9488,6 +10630,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9518,6 +10661,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9592,6 +10736,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9622,6 +10767,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9712,16 +10858,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9777,6 +10934,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9807,6 +10965,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9869,6 +11028,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9880,6 +11042,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9889,12 +11053,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -9933,6 +11103,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -9972,6 +11143,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -10010,6 +11190,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -10022,6 +11203,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -10063,6 +11245,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -10079,11 +11262,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10172,6 +11357,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -10181,6 +11367,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10198,6 +11385,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10226,7 +11414,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10241,6 +11431,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10296,6 +11487,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -10337,6 +11529,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -10366,18 +11559,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -10408,11 +11589,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10421,6 +11604,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -10443,10 +11628,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -10463,6 +11650,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10543,11 +11731,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10609,6 +11799,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10626,7 +11855,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10672,6 +11903,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10690,7 +11922,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10710,6 +11944,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -10741,6 +11976,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -10748,6 +11984,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10770,6 +12007,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10808,6 +12046,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -10822,6 +12061,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10847,6 +12087,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -10937,11 +12180,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10953,11 +12198,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -10968,6 +12215,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -10984,11 +12232,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11000,14 +12250,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11033,17 +12286,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11057,11 +12322,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11072,6 +12339,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11085,6 +12353,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11101,17 +12370,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11125,11 +12406,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11140,12 +12423,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11167,17 +12452,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11191,11 +12488,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11206,6 +12505,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11219,6 +12519,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11235,17 +12536,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11259,11 +12572,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11274,12 +12589,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11291,10 +12608,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11309,6 +12628,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11347,6 +12667,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11359,12 +12680,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11375,6 +12700,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11382,6 +12708,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11396,6 +12723,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11413,6 +12741,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11425,6 +12754,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11446,6 +12783,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11463,6 +12801,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11475,6 +12814,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11497,6 +12844,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11527,6 +12875,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11601,6 +12950,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11631,6 +12981,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11721,16 +13072,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -11786,6 +13148,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11816,6 +13179,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11878,6 +13242,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -11889,6 +13256,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -11898,18 +13267,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -11919,10 +13295,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -11935,10 +13313,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11953,6 +13333,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11991,6 +13372,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12003,12 +13385,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12019,6 +13405,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12026,6 +13413,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12040,6 +13428,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12057,6 +13446,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12069,6 +13459,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12090,6 +13488,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12107,6 +13506,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12119,6 +13519,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12141,6 +13549,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12171,6 +13580,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12245,6 +13655,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12275,6 +13686,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12365,16 +13777,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12430,6 +13853,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12460,6 +13884,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12524,6 +13949,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12535,6 +13963,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12544,12 +13974,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12557,10 +13993,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12575,10 +14017,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12586,10 +14032,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12604,6 +14052,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12642,6 +14091,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12654,12 +14104,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12670,6 +14124,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12677,6 +14132,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12691,6 +14147,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12708,6 +14165,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12720,6 +14178,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12741,6 +14207,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12758,6 +14225,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12770,6 +14238,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12792,6 +14268,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12822,6 +14299,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12896,6 +14374,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12926,6 +14405,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13016,16 +14496,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13081,6 +14572,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13111,6 +14603,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13173,6 +14666,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13184,6 +14680,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13193,12 +14691,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13237,6 +14741,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13276,6 +14781,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13314,6 +14828,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13326,6 +14841,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13367,6 +14883,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13383,11 +14900,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13476,6 +14995,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13485,6 +15005,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13502,6 +15023,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13530,7 +15052,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13545,6 +15069,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13600,6 +15125,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13641,6 +15167,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13670,18 +15197,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13712,11 +15227,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13725,6 +15242,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -13747,10 +15266,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -13767,6 +15288,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13847,11 +15369,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13913,6 +15437,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -13930,7 +15493,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13976,6 +15541,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -13994,7 +15560,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14014,6 +15582,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14045,6 +15614,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14052,6 +15622,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14074,6 +15645,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14112,6 +15684,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14126,6 +15699,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14151,6 +15725,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14222,6 +15799,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml index 6fc5f98e204..580b50c7c5b 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayjobs.ray.io spec: group: ray.io @@ -50,10 +50,20 @@ spec: activeDeadlineSeconds: format: int32 type: integer + backoffLimit: + default: 0 + format: int32 + type: integer clusterSelector: additionalProperties: type: string type: object + deletionPolicy: + type: string + x-kubernetes-validations: + - message: the deletionPolicy field value must be either 'DeleteCluster', + 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone' + rule: self in ['DeleteCluster', 'DeleteWorkers', 'DeleteSelf', 'DeleteNone'] entrypoint: type: string entrypointNumCpus: @@ -64,6 +74,14 @@ spec: type: string jobId: type: string + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] metadata: additionalProperties: type: string @@ -86,6 +104,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -124,6 +143,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -142,6 +162,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -152,6 +173,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -201,16 +223,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -275,6 +308,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -287,6 +322,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -331,6 +495,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -355,6 +520,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -402,6 +568,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -455,6 +623,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -476,6 +646,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -531,11 +702,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -547,11 +720,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -562,6 +737,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -578,11 +754,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -594,14 +772,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -627,17 +808,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -651,11 +844,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -666,6 +861,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -679,6 +875,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -695,17 +892,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -719,11 +928,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -734,12 +945,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -761,17 +974,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -785,11 +1010,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -800,6 +1027,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -813,6 +1041,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -829,17 +1058,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -853,11 +1094,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -868,12 +1111,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -885,10 +1130,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -903,6 +1150,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -941,6 +1189,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -953,12 +1202,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -969,6 +1222,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -976,6 +1230,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -990,6 +1245,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1007,6 +1263,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1019,6 +1276,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1040,6 +1305,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1057,6 +1323,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1069,6 +1336,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1091,6 +1366,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1121,6 +1397,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1195,6 +1472,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1225,6 +1503,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1315,16 +1594,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1380,6 +1670,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1410,6 +1701,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1472,6 +1764,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1483,6 +1778,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1492,18 +1789,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1513,10 +1817,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1529,10 +1835,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1547,6 +1855,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1585,6 +1894,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1597,12 +1907,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1613,6 +1927,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1620,6 +1935,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1634,6 +1950,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1651,6 +1968,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1663,6 +1981,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1684,6 +2010,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1701,6 +2028,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1713,6 +2041,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1735,6 +2071,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1765,6 +2102,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1839,6 +2177,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1869,6 +2208,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1959,16 +2299,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2024,6 +2375,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2054,6 +2406,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2118,6 +2471,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2129,6 +2485,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2138,12 +2496,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2151,10 +2515,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2169,10 +2539,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2180,10 +2554,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2198,6 +2574,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2236,6 +2613,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2248,12 +2626,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2264,6 +2646,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2271,6 +2654,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2285,6 +2669,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2302,6 +2687,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2314,6 +2700,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2335,6 +2729,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2352,6 +2747,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2364,6 +2760,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2386,6 +2790,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2416,6 +2821,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2490,6 +2896,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2520,6 +2927,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2610,16 +3018,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2675,6 +3094,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2705,6 +3125,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2767,6 +3188,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2778,6 +3202,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2787,12 +3213,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2831,6 +3263,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2870,6 +3303,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2908,6 +3350,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2920,6 +3363,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2961,6 +3405,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2977,11 +3422,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3070,6 +3517,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3079,6 +3527,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3096,6 +3545,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3124,7 +3574,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3139,6 +3591,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3194,6 +3647,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3235,6 +3689,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3264,18 +3719,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3306,11 +3749,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3319,6 +3764,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3341,10 +3788,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3361,6 +3810,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3441,11 +3891,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3507,6 +3959,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3524,7 +4015,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3570,6 +4063,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3588,7 +4082,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3608,6 +4104,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3639,6 +4136,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3646,6 +4144,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3668,6 +4167,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3706,6 +4206,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3720,6 +4221,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3745,6 +4247,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3757,6 +4262,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3766,6 +4279,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3793,6 +4309,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3839,11 +4357,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3855,11 +4375,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3870,6 +4392,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3886,11 +4409,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3902,14 +4427,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3935,17 +4463,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3959,11 +4499,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3974,6 +4516,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3987,6 +4530,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4003,17 +4547,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4027,11 +4583,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4042,12 +4600,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4069,17 +4629,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4093,11 +4665,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4108,6 +4682,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4121,6 +4696,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4137,17 +4713,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4161,11 +4749,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4176,12 +4766,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4193,10 +4785,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4211,6 +4805,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4249,6 +4844,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4261,12 +4857,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4277,6 +4877,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4284,6 +4885,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4298,6 +4900,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4315,6 +4918,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4327,6 +4931,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4348,6 +4960,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4365,6 +4978,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4377,6 +4991,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4399,6 +5021,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4429,6 +5052,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4503,6 +5127,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4533,6 +5158,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4623,16 +5249,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4688,6 +5325,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4718,6 +5356,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4780,6 +5419,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4791,6 +5433,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4800,18 +5444,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4821,10 +5472,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4837,10 +5490,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4855,6 +5510,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4893,6 +5549,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4905,12 +5562,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4921,6 +5582,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4928,6 +5590,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4942,6 +5605,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4959,6 +5623,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4971,6 +5636,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4992,6 +5665,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5009,6 +5683,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5021,6 +5696,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5043,6 +5726,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5073,6 +5757,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5147,6 +5832,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5177,6 +5863,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5267,16 +5954,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5332,6 +6030,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5362,6 +6061,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5426,6 +6126,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5437,6 +6140,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5446,12 +6151,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5459,10 +6170,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5477,10 +6194,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5488,10 +6209,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5506,6 +6229,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5544,6 +6268,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5556,12 +6281,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5572,6 +6301,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5579,6 +6309,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5593,6 +6324,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5610,6 +6342,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5622,6 +6355,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5643,6 +6384,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5660,6 +6402,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5672,6 +6415,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5694,6 +6445,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5724,6 +6476,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5798,6 +6551,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5828,6 +6582,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5918,16 +6673,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5983,6 +6749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -6013,6 +6780,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6075,6 +6843,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6086,6 +6857,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6095,12 +6868,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6139,6 +6918,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6178,6 +6958,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6216,6 +7005,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6228,6 +7018,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6269,6 +7060,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6285,11 +7077,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6378,6 +7172,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6387,6 +7182,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6404,6 +7200,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6432,7 +7229,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6447,6 +7246,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6502,6 +7302,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6543,6 +7344,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6572,18 +7374,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6614,11 +7404,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6627,6 +7419,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6649,10 +7443,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6669,6 +7465,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6749,11 +7546,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6815,6 +7614,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6832,7 +7670,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6878,6 +7718,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6896,7 +7737,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6916,6 +7759,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6947,6 +7791,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6954,6 +7799,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6976,6 +7822,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7014,6 +7861,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -7028,6 +7876,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7053,6 +7902,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7075,6 +7927,12 @@ spec: submissionMode: default: K8sJobMode type: string + submitterConfig: + properties: + backoffLimit: + format: int32 + type: integer + type: object submitterPodTemplate: properties: metadata: @@ -7121,11 +7979,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7137,11 +7997,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7152,6 +8014,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7168,11 +8031,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7184,14 +8049,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -7217,17 +8085,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7241,11 +8121,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7256,6 +8138,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7269,6 +8152,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7285,17 +8169,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7309,11 +8205,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7324,12 +8222,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -7351,17 +8251,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7375,11 +8287,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7390,6 +8304,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7403,6 +8318,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7419,17 +8335,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7443,11 +8371,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7458,12 +8388,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -7475,10 +8407,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -7493,6 +8427,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7531,6 +8466,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7543,12 +8479,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7559,6 +8499,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7566,6 +8507,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -7580,6 +8522,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -7597,6 +8540,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7609,6 +8553,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -7630,6 +8582,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -7647,6 +8600,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7659,6 +8613,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -7681,6 +8643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -7711,6 +8674,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7785,6 +8749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -7815,6 +8780,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7905,16 +8871,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7970,6 +8947,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8000,6 +8978,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8062,6 +9041,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8073,6 +9055,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8082,18 +9066,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8103,10 +9094,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8119,10 +9112,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8137,6 +9132,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8175,6 +9171,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8187,12 +9184,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8203,6 +9204,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8210,6 +9212,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8224,6 +9227,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8241,6 +9245,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8253,6 +9258,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8274,6 +9287,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8291,6 +9305,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8303,6 +9318,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8325,6 +9348,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8355,6 +9379,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8429,6 +9454,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8459,6 +9485,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8549,16 +9576,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8614,6 +9652,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8644,6 +9683,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8708,6 +9748,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8719,6 +9762,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8728,12 +9773,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -8741,10 +9792,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -8759,10 +9816,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -8770,10 +9831,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8788,6 +9851,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8826,6 +9890,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8838,12 +9903,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8854,6 +9923,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8861,6 +9931,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8875,6 +9946,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8892,6 +9964,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8904,6 +9977,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8925,6 +10006,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8942,6 +10024,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8954,6 +10037,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8976,6 +10067,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9006,6 +10098,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9080,6 +10173,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9110,6 +10204,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9200,16 +10295,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9265,6 +10371,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9295,6 +10402,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9357,6 +10465,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9368,6 +10479,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9377,12 +10490,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -9421,6 +10540,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -9460,6 +10580,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -9498,6 +10627,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -9510,6 +10640,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -9551,6 +10682,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -9567,11 +10699,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -9660,6 +10794,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -9669,6 +10804,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9686,6 +10822,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9714,7 +10851,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -9729,6 +10868,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9784,6 +10924,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -9825,6 +10966,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -9854,18 +10996,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -9896,11 +11026,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -9909,6 +11041,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -9931,10 +11065,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -9951,6 +11087,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10031,11 +11168,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10097,6 +11236,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10114,7 +11292,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10160,6 +11340,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10178,7 +11359,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10198,6 +11381,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -10229,6 +11413,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -10236,6 +11421,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10258,6 +11444,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10296,6 +11483,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -10310,6 +11498,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10335,6 +11524,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -10345,8 +11537,6 @@ spec: default: 0 format: int32 type: integer - required: - - entrypoint type: object status: properties: @@ -10355,6 +11545,10 @@ spec: endTime: format: date-time type: string + failed: + default: 0 + format: int32 + type: integer jobDeploymentStatus: type: string jobId: @@ -10373,6 +11567,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -10408,8 +11641,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -10424,16 +11661,28 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object reason: type: string startTime: format: date-time type: string + succeeded: + default: 0 + format: int32 + type: integer type: object type: object served: true @@ -10488,6 +11737,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -10526,6 +11776,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -10544,6 +11795,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -10554,6 +11806,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -10603,16 +11856,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -10677,6 +11941,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -10733,6 +11999,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -10757,6 +12024,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -10804,6 +12072,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -10857,6 +12127,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -10878,6 +12150,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -10933,11 +12206,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10949,11 +12224,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -10964,6 +12241,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -10980,11 +12258,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10996,14 +12276,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11029,17 +12312,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11053,11 +12348,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11068,6 +12365,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11081,6 +12379,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11097,17 +12396,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11121,11 +12432,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11136,12 +12449,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11163,17 +12478,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11187,11 +12514,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11202,6 +12531,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11215,6 +12545,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11231,17 +12562,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11255,11 +12598,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11270,12 +12615,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11287,10 +12634,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11305,6 +12654,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11343,6 +12693,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11355,12 +12706,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11371,6 +12726,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11378,6 +12734,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11392,6 +12749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11409,6 +12767,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11421,6 +12780,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11442,6 +12809,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11459,6 +12827,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11471,6 +12840,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11493,6 +12870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11523,6 +12901,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11597,6 +12976,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11627,6 +13007,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11717,16 +13098,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -11782,6 +13174,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11812,6 +13205,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11874,6 +13268,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -11885,6 +13282,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -11894,18 +13293,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -11915,10 +13321,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -11931,10 +13339,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11949,6 +13359,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11987,6 +13398,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11999,12 +13411,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12015,6 +13431,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12022,6 +13439,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12036,6 +13454,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12053,6 +13472,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12065,6 +13485,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12086,6 +13514,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12103,6 +13532,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12115,6 +13545,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12137,6 +13575,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12167,6 +13606,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12241,6 +13681,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12271,6 +13712,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12361,16 +13803,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12426,6 +13879,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12456,6 +13910,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12520,6 +13975,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12531,6 +13989,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12540,12 +14000,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12553,10 +14019,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12571,10 +14043,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12582,10 +14058,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12600,6 +14078,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12638,6 +14117,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12650,12 +14130,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12666,6 +14150,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12673,6 +14158,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12687,6 +14173,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12704,6 +14191,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12716,6 +14204,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12737,6 +14233,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12754,6 +14251,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12766,6 +14264,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12788,6 +14294,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12818,6 +14325,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12892,6 +14400,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12922,6 +14431,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13012,16 +14522,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13077,6 +14598,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13107,6 +14629,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13169,6 +14692,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13180,6 +14706,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13189,12 +14717,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13233,6 +14767,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13272,6 +14807,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13310,6 +14854,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13322,6 +14867,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13363,6 +14909,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13379,11 +14926,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13472,6 +15021,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13481,6 +15031,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13498,6 +15049,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13526,7 +15078,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13541,6 +15095,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13596,6 +15151,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13637,6 +15193,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13666,18 +15223,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13708,11 +15253,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13721,6 +15268,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -13743,10 +15292,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -13763,6 +15314,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13843,11 +15395,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13909,6 +15463,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -13926,7 +15519,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13972,6 +15567,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -13990,7 +15586,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14010,6 +15608,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14041,6 +15640,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14048,6 +15648,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14070,6 +15671,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14108,6 +15710,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14122,6 +15725,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14147,6 +15751,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14237,11 +15844,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -14253,11 +15862,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -14268,6 +15879,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -14284,11 +15896,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -14300,14 +15914,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -14333,17 +15950,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14357,11 +15986,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14372,6 +16003,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -14385,6 +16017,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -14401,17 +16034,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14425,11 +16070,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14440,12 +16087,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -14467,17 +16116,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14491,11 +16152,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14506,6 +16169,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -14519,6 +16183,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -14535,17 +16200,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14559,11 +16236,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14574,12 +16253,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -14591,10 +16272,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -14609,6 +16292,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -14647,6 +16331,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -14659,12 +16344,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -14675,6 +16364,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -14682,6 +16372,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -14696,6 +16387,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -14713,6 +16405,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14725,6 +16418,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -14746,6 +16447,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -14763,6 +16465,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14775,6 +16478,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -14797,6 +16508,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -14827,6 +16539,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14901,6 +16614,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -14931,6 +16645,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15021,16 +16736,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -15086,6 +16812,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15116,6 +16843,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15178,6 +16906,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -15189,6 +16920,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -15198,18 +16931,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -15219,10 +16959,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -15235,10 +16977,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -15253,6 +16997,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15291,6 +17036,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15303,12 +17049,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -15319,6 +17069,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -15326,6 +17077,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -15340,6 +17092,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -15357,6 +17110,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15369,6 +17123,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -15390,6 +17152,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -15407,6 +17170,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15419,6 +17183,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -15441,6 +17213,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15471,6 +17244,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15545,6 +17319,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15575,6 +17350,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15665,16 +17441,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -15730,6 +17517,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15760,6 +17548,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15824,6 +17613,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -15835,6 +17627,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -15844,12 +17638,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -15857,10 +17657,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -15875,10 +17681,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -15886,10 +17696,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -15904,6 +17716,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15942,6 +17755,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15954,12 +17768,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -15970,6 +17788,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -15977,6 +17796,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -15991,6 +17811,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -16008,6 +17829,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16020,6 +17842,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -16041,6 +17871,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -16058,6 +17889,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16070,6 +17902,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -16092,6 +17932,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16122,6 +17963,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16196,6 +18038,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16226,6 +18069,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16316,16 +18160,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -16381,6 +18236,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16411,6 +18267,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16473,6 +18330,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -16484,6 +18344,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -16493,12 +18355,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -16537,6 +18405,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -16576,6 +18445,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -16614,6 +18492,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -16626,6 +18505,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -16667,6 +18547,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -16683,11 +18564,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -16776,6 +18659,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -16785,6 +18669,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16802,6 +18687,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16830,7 +18716,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -16845,6 +18733,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16900,6 +18789,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -16941,6 +18831,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -16970,18 +18861,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -17012,11 +18891,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17025,6 +18906,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -17047,10 +18930,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -17067,6 +18952,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17147,11 +19033,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17213,6 +19101,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -17230,7 +19157,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -17276,6 +19205,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -17294,7 +19224,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -17314,6 +19246,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -17345,6 +19278,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -17352,6 +19286,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17374,6 +19309,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17412,6 +19348,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -17426,6 +19363,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17451,6 +19389,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -17516,11 +19457,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -17532,11 +19475,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -17547,6 +19492,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -17563,11 +19509,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -17579,14 +19527,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -17612,17 +19563,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17636,11 +19599,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17651,6 +19616,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -17664,6 +19630,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -17680,17 +19647,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17704,11 +19683,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17719,12 +19700,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -17746,17 +19729,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17770,11 +19765,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17785,6 +19782,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -17798,6 +19796,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -17814,17 +19813,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17838,11 +19849,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17853,12 +19866,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -17870,10 +19885,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -17888,6 +19905,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -17926,6 +19944,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -17938,12 +19957,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -17954,6 +19977,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -17961,6 +19985,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -17975,6 +20000,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -17992,6 +20018,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18004,6 +20031,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18025,6 +20060,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18042,6 +20078,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18054,6 +20091,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18076,6 +20121,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18106,6 +20152,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18180,6 +20227,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18210,6 +20258,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18300,16 +20349,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -18365,6 +20425,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18395,6 +20456,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18457,6 +20519,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -18468,6 +20533,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -18477,18 +20544,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -18498,10 +20572,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -18514,10 +20590,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -18532,6 +20610,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -18570,6 +20649,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -18582,12 +20662,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -18598,6 +20682,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -18605,6 +20690,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -18619,6 +20705,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18636,6 +20723,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18648,6 +20736,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18669,6 +20765,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18686,6 +20783,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18698,6 +20796,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18720,6 +20826,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18750,6 +20857,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18824,6 +20932,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18854,6 +20963,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18944,16 +21054,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -19009,6 +21130,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19039,6 +21161,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19103,6 +21226,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -19114,6 +21240,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -19123,12 +21251,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -19136,10 +21270,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -19154,10 +21294,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -19165,10 +21309,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -19183,6 +21329,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -19221,6 +21368,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -19233,12 +21381,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -19249,6 +21401,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -19256,6 +21409,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -19270,6 +21424,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -19287,6 +21442,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19299,6 +21455,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -19320,6 +21484,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -19337,6 +21502,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19349,6 +21515,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -19371,6 +21545,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19401,6 +21576,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19475,6 +21651,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19505,6 +21682,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19595,16 +21773,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -19660,6 +21849,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19690,6 +21880,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19752,6 +21943,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -19763,6 +21957,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -19772,12 +21968,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -19816,6 +22018,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -19855,6 +22058,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -19893,6 +22105,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -19905,6 +22118,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -19946,6 +22160,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -19962,11 +22177,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -20055,6 +22272,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -20064,6 +22282,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20081,6 +22300,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20109,7 +22329,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20124,6 +22346,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20179,6 +22402,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -20220,6 +22444,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -20249,18 +22474,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -20291,11 +22504,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -20304,6 +22519,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -20326,10 +22543,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -20346,6 +22565,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20426,11 +22646,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20492,6 +22714,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -20509,7 +22770,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20555,6 +22818,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -20573,7 +22837,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20593,6 +22859,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -20624,6 +22891,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -20631,6 +22899,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20653,6 +22922,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20691,6 +22961,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -20705,6 +22976,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20730,6 +23002,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -20819,6 +23094,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index b3f8b901858..9d0fd9628d7 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayservices.ray.io spec: group: ray.io @@ -38,6 +38,8 @@ spec: deploymentUnhealthySecondThreshold: format: int32 type: integer + excludeHeadPodFromServeSvc: + type: boolean rayClusterConfig: properties: autoscalerOptions: @@ -56,6 +58,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -94,6 +97,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -112,6 +116,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -122,6 +127,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -171,16 +177,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -245,6 +262,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -257,6 +276,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -301,6 +449,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -325,6 +474,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -372,6 +522,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -425,6 +577,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -446,6 +600,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -501,11 +656,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -517,11 +674,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -532,6 +691,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -548,11 +708,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -564,14 +726,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -597,17 +762,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -621,11 +798,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -636,6 +815,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -649,6 +829,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -665,17 +846,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -689,11 +882,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -704,12 +899,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -731,17 +928,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -755,11 +964,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -770,6 +981,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -783,6 +995,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -799,17 +1012,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -823,11 +1048,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -838,12 +1065,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -855,10 +1084,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -873,6 +1104,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -911,6 +1143,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -923,12 +1156,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -939,6 +1176,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -946,6 +1184,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -960,6 +1199,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -977,6 +1217,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -989,6 +1230,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1010,6 +1259,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1027,6 +1277,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1039,6 +1290,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1061,6 +1320,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1091,6 +1351,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1165,6 +1426,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1195,6 +1457,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1285,16 +1548,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1350,6 +1624,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1380,6 +1655,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1442,6 +1718,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1453,6 +1732,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1462,18 +1743,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1483,10 +1771,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1499,10 +1789,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1517,6 +1809,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1555,6 +1848,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1567,12 +1861,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1583,6 +1881,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1590,6 +1889,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1604,6 +1904,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1621,6 +1922,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1633,6 +1935,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1654,6 +1964,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1671,6 +1982,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1683,6 +1995,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1705,6 +2025,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1735,6 +2056,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1809,6 +2131,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1839,6 +2162,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1929,16 +2253,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1994,6 +2329,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2024,6 +2360,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2088,6 +2425,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2099,6 +2439,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2108,12 +2450,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2121,10 +2469,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2139,10 +2493,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2150,10 +2508,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2168,6 +2528,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2206,6 +2567,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2218,12 +2580,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2234,6 +2600,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2241,6 +2608,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2255,6 +2623,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2272,6 +2641,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2284,6 +2654,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2305,6 +2683,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2322,6 +2701,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2334,6 +2714,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2356,6 +2744,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2386,6 +2775,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2460,6 +2850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2490,6 +2881,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2580,16 +2972,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2645,6 +3048,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2675,6 +3079,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2737,6 +3142,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2748,6 +3156,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2757,12 +3167,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2801,6 +3217,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2840,6 +3257,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2878,6 +3304,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2890,6 +3317,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2931,6 +3359,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2947,11 +3376,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3040,6 +3471,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3049,6 +3481,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3066,6 +3499,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3094,7 +3528,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3109,6 +3545,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3164,6 +3601,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3205,6 +3643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3234,18 +3673,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3276,11 +3703,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3289,6 +3718,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3311,10 +3742,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3331,6 +3764,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3411,11 +3845,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3477,6 +3913,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3494,7 +3969,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3540,6 +4017,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3558,7 +4036,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3578,6 +4058,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3609,6 +4090,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3616,6 +4098,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3638,6 +4121,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3676,6 +4160,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3690,6 +4175,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3715,6 +4201,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3727,6 +4216,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3736,6 +4233,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3763,6 +4263,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3809,11 +4311,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3825,11 +4329,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3840,6 +4346,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3856,11 +4363,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3872,14 +4381,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3905,17 +4417,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3929,11 +4453,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3944,6 +4470,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3957,6 +4484,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -3973,17 +4501,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3997,11 +4537,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4012,12 +4554,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4039,17 +4583,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4063,11 +4619,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4078,6 +4636,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4091,6 +4650,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4107,17 +4667,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4131,11 +4703,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4146,12 +4720,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4163,10 +4739,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4181,6 +4759,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4219,6 +4798,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4231,12 +4811,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4247,6 +4831,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4254,6 +4839,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4268,6 +4854,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4285,6 +4872,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4297,6 +4885,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4318,6 +4914,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4335,6 +4932,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4347,6 +4945,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4369,6 +4975,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4399,6 +5006,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4473,6 +5081,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4503,6 +5112,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4593,16 +5203,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4658,6 +5279,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4688,6 +5310,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4750,6 +5373,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4761,6 +5387,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4770,18 +5398,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4791,10 +5426,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4807,10 +5444,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4825,6 +5464,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4863,6 +5503,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4875,12 +5516,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4891,6 +5536,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4898,6 +5544,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4912,6 +5559,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4929,6 +5577,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4941,6 +5590,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4962,6 +5619,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4979,6 +5637,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4991,6 +5650,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5013,6 +5680,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5043,6 +5711,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5117,6 +5786,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5147,6 +5817,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5237,16 +5908,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5302,6 +5984,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5332,6 +6015,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5396,6 +6080,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5407,6 +6094,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5416,12 +6105,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5429,10 +6124,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5447,10 +6148,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5458,10 +6163,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5476,6 +6183,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5514,6 +6222,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5526,12 +6235,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5542,6 +6255,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5549,6 +6263,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5563,6 +6278,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5580,6 +6296,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5592,6 +6309,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5613,6 +6338,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5630,6 +6356,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5642,6 +6369,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5664,6 +6399,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5694,6 +6430,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5768,6 +6505,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5798,6 +6536,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5888,16 +6627,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5953,6 +6703,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5983,6 +6734,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6045,6 +6797,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6056,6 +6811,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6065,12 +6822,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6109,6 +6872,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6148,6 +6912,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6186,6 +6959,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6198,6 +6972,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6239,6 +7014,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6255,11 +7031,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6348,6 +7126,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6357,6 +7136,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6374,6 +7154,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6402,7 +7183,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6417,6 +7200,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6472,6 +7256,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6513,6 +7298,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6542,18 +7328,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6584,11 +7358,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6597,6 +7373,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6619,10 +7397,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6639,6 +7419,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6719,11 +7500,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6785,6 +7568,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6802,7 +7624,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6848,6 +7672,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6866,7 +7691,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6886,6 +7713,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6917,6 +7745,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6924,6 +7753,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6946,6 +7776,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6984,6 +7815,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -6998,6 +7830,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7023,6 +7856,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7080,6 +7916,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7104,6 +7941,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7151,6 +7989,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7204,6 +8044,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7225,12 +8067,18 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object serviceUnhealthySecondThreshold: format: int32 type: integer + upgradeStrategy: + properties: + type: + type: string + type: object type: object status: properties: @@ -7267,6 +8115,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7302,8 +8189,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7318,10 +8209,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object lastUpdateTime: @@ -7366,6 +8265,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7401,8 +8339,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7417,10 +8359,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object serviceStatus: @@ -7464,6 +8414,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7502,6 +8453,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7520,6 +8472,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7530,6 +8483,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7579,16 +8533,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7653,6 +8618,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -7709,6 +8676,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7733,6 +8701,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7780,6 +8749,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7833,6 +8804,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7854,6 +8827,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -7909,11 +8883,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7925,11 +8901,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7940,6 +8918,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7956,11 +8935,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7972,14 +8953,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -8005,17 +8989,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8029,11 +9025,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8044,6 +9042,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -8057,6 +9056,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -8073,17 +9073,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8097,11 +9109,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8112,12 +9126,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -8139,17 +9155,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8163,11 +9191,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8178,6 +9208,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -8191,6 +9222,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -8207,17 +9239,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8231,11 +9275,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8246,12 +9292,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -8263,10 +9311,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8281,6 +9331,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8319,6 +9370,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8331,12 +9383,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8347,6 +9403,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8354,6 +9411,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8368,6 +9426,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8385,6 +9444,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8397,6 +9457,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8418,6 +9486,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8435,6 +9504,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8447,6 +9517,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8469,6 +9547,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8499,6 +9578,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8573,6 +9653,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8603,6 +9684,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8693,16 +9775,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8758,6 +9851,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8788,6 +9882,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8850,6 +9945,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8861,6 +9959,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8870,18 +9970,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8891,10 +9998,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8907,10 +10016,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8925,6 +10036,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8963,6 +10075,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8975,12 +10088,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8991,6 +10108,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8998,6 +10116,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9012,6 +10131,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9029,6 +10149,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9041,6 +10162,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9062,6 +10191,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9079,6 +10209,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9091,6 +10222,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9113,6 +10252,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9143,6 +10283,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9217,6 +10358,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9247,6 +10389,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9337,16 +10480,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9402,6 +10556,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9432,6 +10587,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9496,6 +10652,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9507,6 +10666,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9516,12 +10677,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -9529,10 +10696,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -9547,10 +10720,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -9558,10 +10735,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -9576,6 +10755,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9614,6 +10794,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9626,12 +10807,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -9642,6 +10827,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -9649,6 +10835,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9663,6 +10850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9680,6 +10868,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9692,6 +10881,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9713,6 +10910,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9730,6 +10928,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9742,6 +10941,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9764,6 +10971,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9794,6 +11002,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9868,6 +11077,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9898,6 +11108,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9988,16 +11199,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -10053,6 +11275,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -10083,6 +11306,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -10145,6 +11369,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -10156,6 +11383,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -10165,12 +11394,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -10209,6 +11444,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -10248,6 +11484,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -10286,6 +11531,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -10298,6 +11544,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -10339,6 +11586,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -10355,11 +11603,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10448,6 +11698,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -10457,6 +11708,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10474,6 +11726,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10502,7 +11755,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10517,6 +11772,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10572,6 +11828,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -10613,6 +11870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -10642,18 +11900,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -10684,11 +11930,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10697,6 +11945,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -10719,10 +11969,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -10739,6 +11991,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10819,11 +12072,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10885,6 +12140,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10902,7 +12196,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10948,6 +12244,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10966,7 +12263,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10986,6 +12285,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -11017,6 +12317,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -11024,6 +12325,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11046,6 +12348,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11084,6 +12387,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -11098,6 +12402,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11123,6 +12428,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -11213,11 +12521,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11229,11 +12539,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -11244,6 +12556,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -11260,11 +12573,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11276,14 +12591,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11309,17 +12627,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11333,11 +12663,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11348,6 +12680,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11361,6 +12694,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11377,17 +12711,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11401,11 +12747,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11416,12 +12764,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11443,17 +12793,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11467,11 +12829,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11482,6 +12846,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11495,6 +12860,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11511,17 +12877,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11535,11 +12913,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11550,12 +12930,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11567,10 +12949,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11585,6 +12969,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11623,6 +13008,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11635,12 +13021,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11651,6 +13041,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11658,6 +13049,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11672,6 +13064,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11689,6 +13082,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11701,6 +13095,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11722,6 +13124,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11739,6 +13142,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11751,6 +13155,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11773,6 +13185,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11803,6 +13216,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11877,6 +13291,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11907,6 +13322,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11997,16 +13413,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12062,6 +13489,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12092,6 +13520,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12154,6 +13583,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12165,6 +13597,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12174,18 +13608,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -12195,10 +13636,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -12211,10 +13654,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12229,6 +13674,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12267,6 +13713,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12279,12 +13726,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12295,6 +13746,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12302,6 +13754,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12316,6 +13769,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12333,6 +13787,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12345,6 +13800,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12366,6 +13829,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12383,6 +13847,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12395,6 +13860,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12417,6 +13890,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12447,6 +13921,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12521,6 +13996,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12551,6 +14027,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12641,16 +14118,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12706,6 +14194,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12736,6 +14225,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12800,6 +14290,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12811,6 +14304,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12820,12 +14315,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12833,10 +14334,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12851,10 +14358,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12862,10 +14373,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12880,6 +14393,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12918,6 +14432,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12930,12 +14445,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12946,6 +14465,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12953,6 +14473,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12967,6 +14488,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12984,6 +14506,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12996,6 +14519,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -13017,6 +14548,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -13034,6 +14566,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13046,6 +14579,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -13068,6 +14609,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13098,6 +14640,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13172,6 +14715,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13202,6 +14746,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13292,16 +14837,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13357,6 +14913,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13387,6 +14944,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13449,6 +15007,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13460,6 +15021,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13469,12 +15032,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13513,6 +15082,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13552,6 +15122,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13590,6 +15169,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13602,6 +15182,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13643,6 +15224,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13659,11 +15241,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13752,6 +15336,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13761,6 +15346,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13778,6 +15364,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13806,7 +15393,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13821,6 +15410,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13876,6 +15466,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13917,6 +15508,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13946,18 +15538,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13988,11 +15568,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14001,6 +15583,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -14023,10 +15607,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -14043,6 +15629,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14123,11 +15710,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14189,6 +15778,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -14206,7 +15834,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14252,6 +15882,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -14270,7 +15901,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14290,6 +15923,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14321,6 +15955,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14328,6 +15963,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14350,6 +15986,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14388,6 +16025,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14402,6 +16040,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14427,6 +16066,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14484,6 +16126,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -14508,6 +16151,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -14555,6 +16199,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -14608,6 +16254,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -14629,6 +16277,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -14722,6 +16371,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: @@ -14818,6 +16470,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/helm-chart/kuberay-operator/templates/_helpers.tpl b/helm-chart/kuberay-operator/templates/_helpers.tpl index 040cdd9e44a..05b4c55631a 100644 --- a/helm-chart/kuberay-operator/templates/_helpers.tpl +++ b/helm-chart/kuberay-operator/templates/_helpers.tpl @@ -54,3 +54,301 @@ Create the name of the service account to use {{ default "default" .Values.serviceAccount.name }} {{- end -}} {{- end -}} + + +{{/* +FeatureGates +*/}} +{{- define "kuberay.featureGates" -}} +{{- $features := "" }} +{{- range .Values.featureGates }} + {{- $str := printf "%s=%t," .name .enabled }} + {{- $features = print $features $str }} +{{- end }} +{{- with .Values.featureGates }} +--feature-gates={{ $features | trimSuffix "," }} +{{- end }} +{{- end }} + + +{{/* +Create a template to ensure consistency for Role and ClusterRole. +*/}} +{{- define "role.consistentRules" -}} +rules: +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - update +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods/proxy + verbs: + - get + - patch + - update +- apiGroups: + - "" + resources: + - pods/status + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - watch +- apiGroups: + - "" + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - services/proxy + verbs: + - create + - get + - patch + - update +- apiGroups: + - "" + resources: + - services/status + verbs: + - get + - patch + - update +- apiGroups: + - extensions + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingressclasses + verbs: + - get + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/finalizers + verbs: + - update +- apiGroups: + - ray.io + resources: + - rayclusters/status + verbs: + - get + - patch + - update +- apiGroups: + - ray.io + resources: + - rayjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayjobs/finalizers + verbs: + - update +- apiGroups: + - ray.io + resources: + - rayjobs/status + verbs: + - get + - patch + - update +- apiGroups: + - ray.io + resources: + - rayservices + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayservices/finalizers + verbs: + - update +- apiGroups: + - ray.io + resources: + - rayservices/status + verbs: + - get + - patch + - update +- apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + verbs: + - create + - delete + - get + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + verbs: + - create + - delete + - get + - list + - update + - watch +- apiGroups: + - route.openshift.io + resources: + - routes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +{{- if or .batchSchedulerEnabled (eq .batchSchedulerName "volcano") }} +- apiGroups: + - scheduling.volcano.sh + resources: + - podgroups + verbs: + - create + - delete + - get + - list + - update + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get +{{- end -}} +{{- end -}} diff --git a/helm-chart/kuberay-operator/templates/deployment.yaml b/helm-chart/kuberay-operator/templates/deployment.yaml index 1ce36468727..9c15f90ba72 100644 --- a/helm-chart/kuberay-operator/templates/deployment.yaml +++ b/helm-chart/kuberay-operator/templates/deployment.yaml @@ -37,7 +37,12 @@ spec: {{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }} volumes: - name: kuberay-logs + {{- if .Values.logging.sizeLimit }} + emptyDir: + sizeLimit: {{ .Values.logging.sizeLimit }} + {{- else }} emptyDir: {} + {{- end }} {{- end }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} @@ -53,12 +58,18 @@ spec: mountPath: "{{ .Values.logging.baseDir }}" {{- end }} command: - - /manager + - {{ .Values.operatorComand }} args: {{- $argList := list -}} + {{- $argList = append $argList (include "kuberay.featureGates" . | trim) -}} + {{- if .Values.batchScheduler -}} {{- if .Values.batchScheduler.enabled -}} {{- $argList = append $argList "--enable-batch-scheduler" -}} {{- end -}} + {{- if .Values.batchScheduler.name -}} + {{- $argList = append $argList (printf "--batch-scheduler=%s" .Values.batchScheduler.name) -}} + {{- end -}} + {{- end -}} {{- $watchNamespace := "" -}} {{- if and .Values.singleNamespaceInstall (not .Values.watchNamespace) -}} {{- $watchNamespace = .Release.Namespace -}} @@ -81,13 +92,19 @@ spec: {{- $argList = append $argList "--log-file-encoder" -}} {{- $argList = append $argList .Values.logging.fileEncoder -}} {{- end -}} + {{- if hasKey .Values "useKubernetesProxy" -}} + {{- $argList = append $argList (printf "--use-kubernetes-proxy=%t" .Values.useKubernetesProxy) -}} + {{- end -}} + {{- if hasKey .Values "leaderElectionEnabled" -}} + {{- $argList = append $argList (printf "--enable-leader-election=%t" .Values.leaderElectionEnabled) -}} + {{- end -}} {{- (printf "\n") -}} {{- $argList | toYaml | indent 12 }} ports: - name: http containerPort: 8080 protocol: TCP - env: + env: {{- toYaml .Values.env | nindent 12}} livenessProbe: httpGet: diff --git a/helm-chart/kuberay-operator/templates/leader_election_role.yaml b/helm-chart/kuberay-operator/templates/leader_election_role.yaml index 9049e4d4b76..134fdebef47 100644 --- a/helm-chart/kuberay-operator/templates/leader_election_role.yaml +++ b/helm-chart/kuberay-operator/templates/leader_election_role.yaml @@ -17,14 +17,6 @@ rules: - update - patch - delete -- apiGroups: - - "" - resources: - - configmaps/status - verbs: - - get - - update - - patch - apiGroups: - "" resources: diff --git a/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml b/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml index 4925893bce6..408558dc820 100644 --- a/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml +++ b/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml @@ -10,250 +10,6 @@ metadata: labels: {{ include "kuberay-operator.labels" $ | nindent 4 }} name: {{ include "kuberay-operator.fullname" $ }} namespace: {{ $namespace }} -rules: -- apiGroups: - - batch - resources: - - jobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - list - - update -- apiGroups: - - "" - resources: - - endpoints - verbs: - - get - - list -- apiGroups: - - "" - resources: - - events - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods/status - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - services/status - verbs: - - get - - patch - - update -- apiGroups: - - extensions - resources: - - ingresses - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - networking.k8s.io - resources: - - ingressclasses - verbs: - - get - - list - - watch -- apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayclusters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayclusters/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayclusters/status - verbs: - - get - - patch - - update -- apiGroups: - - ray.io - resources: - - rayjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayjobs/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - ray.io - resources: - - rayservices - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayservices/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayservices/status - verbs: - - get - - patch - - update -- apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - roles - verbs: - - create - - delete - - get - - list - - update - - watch -{{- if $.Values.batchScheduler.enabled }} -- apiGroups: - - scheduling.volcano.sh - resources: - - podgroups - verbs: - - create - - delete - - get - - list - - update - - watch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - get -{{- end }} +{{ include "role.consistentRules" (dict "batchSchedulerEnabled" $.Values.batchScheduler.enabled) }} {{- end }} {{- end }} diff --git a/helm-chart/kuberay-operator/templates/ray_rayservice_editor_role.yaml b/helm-chart/kuberay-operator/templates/ray_rayservice_editor_role.yaml index 2e2c0fa4401..1c0a695b592 100644 --- a/helm-chart/kuberay-operator/templates/ray_rayservice_editor_role.yaml +++ b/helm-chart/kuberay-operator/templates/ray_rayservice_editor_role.yaml @@ -23,4 +23,4 @@ rules: - rayservices/status verbs: - get -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm-chart/kuberay-operator/templates/ray_rayservice_viewer_role.yaml b/helm-chart/kuberay-operator/templates/ray_rayservice_viewer_role.yaml index 9641dbb83d9..59e878feded 100644 --- a/helm-chart/kuberay-operator/templates/ray_rayservice_viewer_role.yaml +++ b/helm-chart/kuberay-operator/templates/ray_rayservice_viewer_role.yaml @@ -19,4 +19,4 @@ rules: - rayservices/status verbs: - get -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm-chart/kuberay-operator/templates/role.yaml b/helm-chart/kuberay-operator/templates/role.yaml index cc1e3bf3752..7ed91e2783f 100644 --- a/helm-chart/kuberay-operator/templates/role.yaml +++ b/helm-chart/kuberay-operator/templates/role.yaml @@ -6,262 +6,5 @@ metadata: labels: {{ include "kuberay-operator.labels" . | indent 4 }} name: {{ include "kuberay-operator.fullname" . }} -rules: -- apiGroups: - - batch - resources: - - jobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - list - - update -- apiGroups: - - "" - resources: - - endpoints - verbs: - - get - - list -- apiGroups: - - "" - resources: - - events - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods/status - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - services/status - verbs: - - get - - patch - - update -- apiGroups: - - extensions - resources: - - ingresses - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - networking.k8s.io - resources: - - ingressclasses - verbs: - - get - - list - - watch -- apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayclusters - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayclusters/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayclusters/status - verbs: - - get - - patch - - update -- apiGroups: - - ray.io - resources: - - rayjobs - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayjobs/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayjobs/status - verbs: - - get - - patch - - update -- apiGroups: - - ray.io - resources: - - rayservices - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - ray.io - resources: - - rayservices/finalizers - verbs: - - update -- apiGroups: - - ray.io - resources: - - rayservices/status - verbs: - - get - - patch - - update -- apiGroups: - - rbac.authorization.k8s.io - resources: - - rolebindings - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - roles - verbs: - - create - - delete - - get - - list - - update - - watch -- apiGroups: - - route.openshift.io - resources: - - routes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -{{- if .Values.batchScheduler.enabled }} -- apiGroups: - - scheduling.volcano.sh - resources: - - podgroups - verbs: - - create - - delete - - get - - list - - update - - watch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - get -{{- end }} +{{ include "role.consistentRules" (dict "batchSchedulerEnabled" .Values.batchScheduler.enabled "batchSchedulerName" .Values.batchScheduler.name) }} {{- end }} diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml index a86c8901a21..803a7aeda6b 100644 --- a/helm-chart/kuberay-operator/values.yaml +++ b/helm-chart/kuberay-operator/values.yaml @@ -44,6 +44,8 @@ logging: baseDir: "" # File name for kuberay-operator log file fileName: "" + # EmptyDir volume size limit for kuberay-operator log file + sizeLimit: "" livenessProbe: initialDelaySeconds: 10 @@ -55,11 +57,49 @@ readinessProbe: periodSeconds: 5 failureThreshold: 5 +# Enable customized Kubernetes scheduler integration. If enabled, Ray workloads will be scheduled +# by the customized scheduler. +# * "enabled" is the legacy option and will be deprecated soon. +# * "name" is the standard option, expecting a scheduler name, supported values are +# "default", "volcano", and "yunikorn". +# +# Note: "enabled" and "name" should not be set at the same time. If both are set, an error will be thrown. +# +# Examples: +# 1. Use volcano (deprecated) +# batchScheduler: +# enabled: true +# +# 2. Use volcano +# batchScheduler: +# name: volcano +# +# 3. Use yunikorn +# batchScheduler: +# name: yunikorn +# batchScheduler: + # Deprecated. This option will be removed in the future. + # Note, for backwards compatibility. When it sets to true, it enables volcano scheduler integration. enabled: false + # Set the customized scheduler name, supported values are "volcano" or "yunikorn", do not set + # "batchScheduler.enabled=true" at the same time as it will override this option. + name: "" + +featureGates: + - name: RayClusterStatusConditions + enabled: true + - name: RayJobDeletionPolicy + enabled: false + +# Path to the operator binary +operatorComand: /manager # Set up `securityContext` to improve Pod security. # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/pod-security.md for further guidance. +podSecurityContext: {} + +# Set up `securityContext` to improve container security. securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true @@ -70,6 +110,12 @@ securityContext: seccompProfile: type: RuntimeDefault +# if userKubernetesProxy is set to true, the KubeRay operator will be configured with the --use-kubernetes-proxy flag. +# Using this option to configure kuberay-operator to comunitcate to Ray head pods by proxying through the Kubernetes API Server. +# useKubernetesProxy: true + +# If leaderElectionEnabled is set to true, the KubeRay operator will use leader election for high availability. +leaderElectionEnabled: true # If rbacEnable is set to false, no RBAC resources will be created, including the Role for leader election, the Role for Pods and Services, and so on. rbacEnable: true @@ -103,6 +149,9 @@ env: # Warning: we highly recommend setting to true and let kuberay handle for you. # - name: ENABLE_INIT_CONTAINER_INJECTION # value: "true" +# If set to true, kuberay creates a normal ClusterIP service for a Ray Head instead of a Headless service. Default to false. +# - name: ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE +# value: "false" # If not set or set to "", kuberay will pick up the default k8s cluster domain `cluster.local` # Otherwise, kuberay will use your custom domain # - name: CLUSTER_DOMAIN @@ -128,3 +177,6 @@ env: # Enabling this feature contributes to the robustness of Ray clusters. # - name: ENABLE_PROBES_INJECTION # value: "true" +# If set to true, the RayJob CR itself will be deleted if shutdownAfterJobFinishes is set to true. Note that all resources created by the RayJob CR will be deleted, including the K8s Job. Otherwise, only the RayCluster CR will be deleted. Default is false. +# - name: DELETE_RAYJOB_CR_AFTER_JOB_FINISHES +# value: "false" diff --git a/helm-chart/ray-cluster/README.md b/helm-chart/ray-cluster/README.md index 9a77dd95b58..88e4e497677 100644 --- a/helm-chart/ray-cluster/README.md +++ b/helm-chart/ray-cluster/README.md @@ -10,7 +10,7 @@ See [kuberay-operator/README.md](https://github.com/ray-project/kuberay/blob/mas ## End-to-end example ```sh -# Step 1: Create a KinD cluster +# Step 1: Create a KinD cluster kind create cluster # Step 2: Register a Helm chart repo @@ -26,7 +26,7 @@ helm install raycluster kuberay/ray-cluster --version 1.1.0 # See here for all available arm64 images: https://hub.docker.com/r/rayproject/ray/tags?page=1&name=aarch64 helm install raycluster kuberay/ray-cluster --version 1.1.0 --set image.tag=nightly-aarch64 -# Step 5: Verify the installation of KubeRay operator and RayCluster +# Step 5: Verify the installation of KubeRay operator and RayCluster kubectl get pods # NAME READY STATUS RESTARTS AGE # kuberay-operator-6fcbb94f64-gkpc9 1/1 Running 0 89s diff --git a/helm-chart/ray-cluster/templates/raycluster-cluster.yaml b/helm-chart/ray-cluster/templates/raycluster-cluster.yaml index ee1781cb45f..61bb04e8b47 100644 --- a/helm-chart/ray-cluster/templates/raycluster-cluster.yaml +++ b/helm-chart/ray-cluster/templates/raycluster-cluster.yaml @@ -38,10 +38,16 @@ spec: {{- end }} template: spec: + {{- if .Values.head.dnsConfig }} + dnsConfig: {{- toYaml .Values.head.dnsConfig | nindent 10 }} + {{- end }} imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 10 }} {{- if .Values.head.serviceAccountName }} serviceAccountName: {{ .Values.head.serviceAccountName }} {{- end }} + {{- if .Values.head.restartPolicy }} + restartPolicy: {{ .Values.head.restartPolicy }} + {{- end }} {{- if .Values.head.initContainers }} initContainers: {{- toYaml .Values.head.initContainers | nindent 10 }} {{- end }} @@ -60,11 +66,8 @@ spec: resources: {{- toYaml .Values.head.resources | nindent 14 }} securityContext: {{- toYaml .Values.head.securityContext | nindent 14 }} + {{- with concat .Values.common.containerEnv .Values.head.containerEnv }} env: - {{- with .Values.common.containerEnv }} - {{- toYaml . | nindent 14 }} - {{- end }} - {{- with .Values.head.containerEnv }} {{- toYaml . | nindent 14 }} {{- end }} {{- with .Values.head.envFrom }} @@ -89,9 +92,22 @@ spec: {{ if .Values.head.volumes }} volumes: {{- toYaml .Values.head.volumes | nindent 10 }} {{- end }} + {{- if .Values.head.topologySpreadConstraints.enabled }} + topologySpreadConstraints: {{- toYaml .Values.head.topologySpreadConstraints | nindent 10 }} + {{- end }} affinity: {{- toYaml .Values.head.affinity | nindent 10 }} + {{ if .Values.head.priorityClassName }} + priorityClassName: {{- toYaml .Values.head.priorityClassName | nindent 10 }} + {{- end }} + {{ if .Values.head.priority }} + priority: {{- toYaml .Values.head.priority | nindent 10 }} + {{- end }} tolerations: {{- toYaml .Values.head.tolerations | nindent 10 }} nodeSelector: {{- toYaml .Values.head.nodeSelector | nindent 10 }} + {{- with .Values.head.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} metadata: annotations: {{- toYaml .Values.head.annotations | nindent 10 }} {{- if .Values.head.labels }} @@ -99,7 +115,7 @@ spec: {{ include "ray-cluster.labels" . | indent 10 }} {{ else }} labels: {{ include "ray-cluster.labels" . | nindent 10 }} - {{- end }} + {{- end }} workerGroupSpecs: {{- range $groupName, $values := .Values.additionalWorkerGroups }} @@ -125,10 +141,16 @@ spec: groupName: {{ $groupName }} template: spec: + {{- if $values.dnsConfig }} + dnsConfig: {{- toYaml $values.dnsConfig | nindent 10 }} + {{- end }} imagePullSecrets: {{- toYaml $.Values.imagePullSecrets | nindent 10 }} {{- if $values.serviceAccountName }} serviceAccountName: {{ $values.serviceAccountName }} {{- end }} + {{- if $values.restartPolicy }} + restartPolicy: {{ $values.restartPolicy }} + {{- end }} {{- if $values.initContainers }} initContainers: {{- toYaml $values.initContainers | nindent 10 }} {{- end }} @@ -147,17 +169,16 @@ spec: resources: {{- toYaml $values.resources | nindent 14 }} securityContext: {{- toYaml $values.securityContext | nindent 14 }} + {{- with concat $.Values.common.containerEnv ($values.containerEnv | default list) }} env: - {{- with $.Values.common.containerEnv }} {{- toYaml . | nindent 14 }} {{- end }} - {{- with $values.containerEnv }} - {{- toYaml . | nindent 14}} - {{- end }} {{- if $values.envFrom }} envFrom: {{- toYaml $values.envFrom | nindent 14 }} {{- end }} + {{- if $values.ports }} ports: {{- toYaml $values.ports | nindent 14}} + {{- end }} {{- if $values.lifecycle }} lifecycle: {{- toYaml $values.lifecycle | nindent 14 }} @@ -174,9 +195,22 @@ spec: {{ if $values.volumes }} volumes: {{- toYaml $values.volumes | nindent 10 }} {{- end }} + {{- if $values.topologySpreadConstraints }} + topologySpreadConstraints: {{- toYaml $values.topologySpreadConstraints | nindent 10 }} + {{- end }} affinity: {{- toYaml $values.affinity | nindent 10 }} + {{ if $values.priorityClassName }} + priorityClassName: {{- toYaml $values.priorityClassName | nindent 10 }} + {{- end }} + {{ if $values.priority }} + priority: {{- toYaml $values.priority | nindent 10 }} + {{- end }} tolerations: {{- toYaml $values.tolerations | nindent 10 }} nodeSelector: {{- toYaml $values.nodeSelector | nindent 10 }} + {{- with $values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} metadata: annotations: {{- toYaml $values.annotations | nindent 10 }} {{- if $values.labels }} @@ -184,7 +218,7 @@ spec: {{ include "ray-cluster.labels" $ | indent 10 }} {{ else }} labels: {{ include "ray-cluster.labels" $ | nindent 10 }} - {{- end }} + {{- end }} {{- end }} {{- end }} @@ -210,10 +244,16 @@ spec: groupName: {{ .Values.worker.groupName }} template: spec: + {{- if .Values.worker.dnsConfig }} + dnsConfig: {{- toYaml .Values.worker.dnsConfig | nindent 10 }} + {{- end }} imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 10 }} {{- if .Values.worker.serviceAccountName }} serviceAccountName: {{ .Values.worker.serviceAccountName }} {{- end }} + {{- if .Values.worker.restartPolicy }} + restartPolicy: {{ .Values.worker.restartPolicy }} + {{- end }} {{- if .Values.worker.initContainers }} initContainers: {{- toYaml .Values.worker.initContainers | nindent 10 }} {{- end }} @@ -232,17 +272,16 @@ spec: resources: {{- toYaml .Values.worker.resources | nindent 14 }} securityContext: {{- toYaml .Values.worker.securityContext | nindent 14 }} + {{- with concat .Values.common.containerEnv .Values.worker.containerEnv }} env: - {{- with .Values.common.containerEnv }} {{- toYaml . | nindent 14 }} {{- end }} - {{- with .Values.worker.containerEnv }} - {{- toYaml . | nindent 14}} - {{- end }} {{- with .Values.worker.envFrom }} envFrom: {{- toYaml . | nindent 14}} {{- end }} + {{- if .Values.worker.ports }} ports: {{- toYaml .Values.worker.ports | nindent 14}} + {{- end }} {{- if .Values.worker.lifecycle }} lifecycle: {{- toYaml .Values.worker.lifecycle | nindent 14 }} @@ -260,8 +299,18 @@ spec: volumes: {{- toYaml .Values.worker.volumes | nindent 10 }} {{- end }} affinity: {{- toYaml .Values.worker.affinity | nindent 10 }} + {{ if .Values.worker.priorityClassName }} + priorityClassName: {{- toYaml .Values.worker.priorityClassName | nindent 10 }} + {{- end }} + {{ if .Values.worker.priority }} + priority: {{- toYaml .Values.worker.priority | nindent 10 }} + {{- end }} tolerations: {{- toYaml .Values.worker.tolerations | nindent 10 }} nodeSelector: {{- toYaml .Values.worker.nodeSelector | nindent 10 }} + {{- with .Values.worker.podSecurityContext}} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} metadata: annotations: {{- toYaml .Values.worker.annotations | nindent 10 }} {{- if .Values.worker.labels }} diff --git a/helm-chart/ray-cluster/values.yaml b/helm-chart/ray-cluster/values.yaml index 7c23e4ccb2e..5755ddc4c08 100644 --- a/helm-chart/ray-cluster/values.yaml +++ b/helm-chart/ray-cluster/values.yaml @@ -20,7 +20,7 @@ imagePullSecrets: [] common: # containerEnv specifies environment variables for the Ray head and worker containers. # Follows standard K8s container env schema. - containerEnv: {} + containerEnv: [] # - name: BLAH # value: VAL head: @@ -56,6 +56,7 @@ head: # Note: From KubeRay v0.6.0, users need to create the ServiceAccount by themselves if they specify the `serviceAccountName` # in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details. serviceAccountName: "" + restartPolicy: "" rayStartParams: dashboard-host: '0.0.0.0' # containerEnv specifies environment variables for the Ray container, @@ -89,6 +90,8 @@ head: nodeSelector: {} tolerations: [] affinity: {} + # Pod security context. + podSecurityContext: {} # Ray container security context. securityContext: {} # Optional: The following volumes/volumeMounts configurations are optional but recommended because @@ -113,6 +116,19 @@ head: # annotations: # prometheus.io/scrape: "true" + # Custom pod DNS configuration + # See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config + # dnsConfig: + # nameservers: + # - 8.8.8.8 + # searches: + # - example.local + # options: + # - name: ndots + # value: "2" + # - name: edns0 + topologySpreadConstraints: {} + worker: # If you want to disable the default workergroup @@ -124,6 +140,7 @@ worker: maxReplicas: 3 labels: {} serviceAccountName: "" + restartPolicy: "" rayStartParams: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. @@ -155,6 +172,8 @@ worker: nodeSelector: {} tolerations: [] affinity: {} + # Pod security context. + podSecurityContext: {} # Ray container security context. securityContext: {} # Optional: The following volumes/volumeMounts configurations are optional but recommended because @@ -172,6 +191,20 @@ worker: # container command for worker Pod. command: [] args: [] + topologySpreadConstraints: {} + + + # Custom pod DNS configuration + # See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config + # dnsConfig: + # nameservers: + # - 8.8.8.8 + # searches: + # - example.local + # options: + # - name: ndots + # value: "2" + # - name: edns0 # The map's key is used as the groupName. # For example, key:small-group in the map below @@ -185,6 +218,7 @@ additionalWorkerGroups: maxReplicas: 3 labels: {} serviceAccountName: "" + restartPolicy: "" rayStartParams: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. @@ -216,6 +250,8 @@ additionalWorkerGroups: nodeSelector: {} tolerations: [] affinity: {} + # Pod security context. + podSecurityContext: {} # Ray container security context. securityContext: {} # Optional: The following volumes/volumeMounts configurations are optional but recommended because @@ -232,6 +268,22 @@ additionalWorkerGroups: command: [] args: [] + # Topology Spread Constraints for worker pods + # See: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: {} + + # Custom pod DNS configuration + # See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config + # dnsConfig: + # nameservers: + # - 8.8.8.8 + # searches: + # - example.local + # options: + # - name: ndots + # value: "2" + # - name: edns0 + # Configuration for Head's Kubernetes Service service: # This is optional, and the default is ClusterIP. diff --git a/install/prometheus/overrides.yaml b/install/prometheus/overrides.yaml index 37d44b79582..28526d2a89a 100644 --- a/install/prometheus/overrides.yaml +++ b/install/prometheus/overrides.yaml @@ -4044,4 +4044,4 @@ extraManifests: [] # labels: # name: prometheus-extra # data: - # extra-data: "value" \ No newline at end of file + # extra-data: "value" diff --git a/kubectl-plugin/.gitignore b/kubectl-plugin/.gitignore new file mode 100644 index 00000000000..3c5e5524865 --- /dev/null +++ b/kubectl-plugin/.gitignore @@ -0,0 +1,3 @@ +kubectl-ray + +dist/ diff --git a/kubectl-plugin/.goreleaser.yaml b/kubectl-plugin/.goreleaser.yaml new file mode 100644 index 00000000000..4b2fe852d72 --- /dev/null +++ b/kubectl-plugin/.goreleaser.yaml @@ -0,0 +1,42 @@ +version: 2 + +before: + hooks: + - cp ../LICENSE . + - go mod tidy + +builds: + - env: + - CGO_ENABLED=0 + goos: + - linux + - darwin + goarch: + - amd64 + - arm64 + main: ./cmd + binary: kubectl-ray + ldflags: -X github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/version.Version={{- .Tag -}} + +archives: + - format: tar.gz + # this name template makes the OS and Arch compatible with the results of `uname`. + name_template: >- + {{ .Binary }}_ + {{- .Tag }}_ + {{- .Os }}_ + {{- if eq .Arch "amd64" }}amd64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + # use zip for windows archives + format_overrides: + - goos: windows + format: zip + +changelog: + sort: asc + filters: + exclude: + - "^docs:" + - "^test:" diff --git a/kubectl-plugin/README.md b/kubectl-plugin/README.md new file mode 100644 index 00000000000..cef6329ee0f --- /dev/null +++ b/kubectl-plugin/README.md @@ -0,0 +1,44 @@ +# Kuberay Kubectl Plugin + +Kubectl plugin/extension for Kuberay CLI that provides the ability to manage ray resources. + +## Prerequisites + +1. Make sure there is a Kubernetes cluster running with KubeRay installed. +2. Make sure `kubectl` has the right context. + +## Installation + +You can install the Kuberay kubectl plugin using one of the following methods: + +### Install using Krew kubectl plugin manager (Recommended) + +1. Install [Krew](https://krew.sigs.k8s.io/docs/user-guide/setup/install/). +2. Download the plugin list by running `kubectl krew update`. +3. Install the plugin by running `kubectl krew install ray`. +4. Run `kubectl ray --help` to verify the installation. + +### Download from GitHub releases + +Go to the [releases page](https://github.com/ray-project/kuberay/releases) and download the binary for your platform. + +For example, to install kubectl plugin version 1.2.2 on Linux amd64: + +```bash +curl -LO https://github.com/ray-project/kuberay/releases/download/v1.2.2/kubectl-ray_v1.2.2_linux_amd64.tar.gz +tar -xvf kubectl-ray_v1.2.2_linux_amd64.tar.gz +cp kubectl-ray ~/.local/bin +``` + +Replace `~/.local/bin` with the directory in your `PATH`. + +### Compiling from source + +1. Run `go build cmd/kubectl-ray.go` +2. Move the binary, which will be named `kubectl-ray` to your `PATH` + +## Shell Completion + +1. Install [kubectl plugin-completion](https://github.com/marckhouzam/kubectl-plugin_completion) plugin. +2. Run `kubectl plugin-completion generate`. +3. Add `$HOME/.kubectl-plugin-completion` to `PATH` in your shell profile. diff --git a/kubectl-plugin/cmd/kubectl-ray.go b/kubectl-plugin/cmd/kubectl-ray.go new file mode 100644 index 00000000000..fcd6393a93a --- /dev/null +++ b/kubectl-plugin/cmd/kubectl-ray.go @@ -0,0 +1,20 @@ +package main + +import ( + "os" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd" + flag "github.com/spf13/pflag" + "k8s.io/cli-runtime/pkg/genericiooptions" +) + +func main() { + flags := flag.NewFlagSet("kubectl-ray", flag.ExitOnError) + flag.CommandLine = flags + ioStreams := genericiooptions.IOStreams{In: os.Stdin, Out: os.Stdout, ErrOut: os.Stderr} + + root := cmd.NewRayCommand(ioStreams) + if err := root.Execute(); err != nil { + os.Exit(1) + } +} diff --git a/kubectl-plugin/go.mod b/kubectl-plugin/go.mod new file mode 100644 index 00000000000..ae218640b12 --- /dev/null +++ b/kubectl-plugin/go.mod @@ -0,0 +1,107 @@ +module github.com/ray-project/kuberay/kubectl-plugin + +go 1.22.0 + +toolchain go1.22.5 + +require ( + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 + github.com/onsi/ginkgo/v2 v2.20.2 + github.com/onsi/gomega v1.34.2 + github.com/ray-project/kuberay/ray-operator v0.0.0 + github.com/spf13/cobra v1.8.1 + github.com/spf13/pflag v1.0.5 + github.com/stretchr/testify v1.9.0 + gopkg.in/yaml.v2 v2.4.0 + k8s.io/api v0.31.1 + k8s.io/apimachinery v0.31.1 + k8s.io/cli-runtime v0.31.1 + k8s.io/client-go v0.31.1 + k8s.io/kubectl v0.31.1 + sigs.k8s.io/yaml v1.4.0 +) + +require ( + github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect + github.com/MakeNowJust/heredoc v1.0.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/chai2010/gettext-go v1.0.3 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect + github.com/fatih/camelcase v1.0.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-errors/errors v1.5.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20240910150728-a0b0bb1d4134 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.3 // indirect + github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect + github.com/imdario/mergo v0.3.16 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/mitchellh/go-wordwrap v1.0.1 // indirect + github.com/moby/spdystream v0.5.0 // indirect + github.com/moby/term v0.5.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect + github.com/peterbourgon/diskv v2.0.1+incompatible // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.20.4 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.59.1 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/x448/float16 v0.8.4 // indirect + github.com/xlab/treeprint v1.2.0 // indirect + go.starlark.net v0.0.0-20240725214946-42030a7cedce // indirect + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/term v0.24.0 // indirect + golang.org/x/text v0.18.0 // indirect + golang.org/x/time v0.6.0 // indirect + golang.org/x/tools v0.25.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.31.1 // indirect + k8s.io/component-base v0.31.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect + k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect + sigs.k8s.io/controller-runtime v0.19.0 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/kustomize/api v0.17.3 // indirect + sigs.k8s.io/kustomize/kyaml v0.17.2 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect +) + +replace github.com/ray-project/kuberay/ray-operator => ../ray-operator diff --git a/kubectl-plugin/go.sum b/kubectl-plugin/go.sum new file mode 100644 index 00000000000..21aa3915ae5 --- /dev/null +++ b/kubectl-plugin/go.sum @@ -0,0 +1,260 @@ +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= +github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chai2010/gettext-go v1.0.3 h1:9liNh8t+u26xl5ddmWLmsOsdNLwkdRTg5AG+JnTiM80= +github.com/chai2010/gettext-go v1.0.3/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= +github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4= +github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc= +github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8= +github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= +github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240910150728-a0b0bb1d4134 h1:c5FlPPgxOn7kJz3VoPLkQYQXGBS3EklQ4Zfi57uOuqQ= +github.com/google/pprof v0.0.0-20240910150728-a0b0bb1d4134/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= +github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= +github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= +github.com/lithammer/dedent v1.1.0 h1:VNzHMVCBNG1j0fh3OrsFRkVUwStdDArbgBWoPAffktY= +github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= +github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= +github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= +github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= +github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= +github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= +github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= +github.com/onsi/ginkgo/v2 v2.20.2 h1:7NVCeyIWROIAheY21RLS+3j2bb52W0W82tkberYytp4= +github.com/onsi/ginkgo/v2 v2.20.2/go.mod h1:K9gyxPIlb+aIvnZ8bd9Ak+YP18w3APlR+5coaZoE2ag= +github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= +github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= +github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= +github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= +github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= +github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.starlark.net v0.0.0-20240725214946-42030a7cedce h1:YyGqCjZtGZJ+mRPaenEiB87afEO2MFRzLiJNZ0Z0bPw= +go.starlark.net v0.0.0-20240725214946-42030a7cedce/go.mod h1:YKMCv9b1WrfWmeqdV5MAuEHWsu5iC+fe6kYl2sQjdI8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= +golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= +k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= +k8s.io/apiextensions-apiserver v0.31.1 h1:L+hwULvXx+nvTYX/MKM3kKMZyei+UiSXQWciX/N6E40= +k8s.io/apiextensions-apiserver v0.31.1/go.mod h1:tWMPR3sgW+jsl2xm9v7lAyRF1rYEK71i9G5dRtkknoQ= +k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= +k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/cli-runtime v0.31.1 h1:/ZmKhmZ6hNqDM+yf9s3Y4KEYakNXUn5sod2LWGGwCuk= +k8s.io/cli-runtime v0.31.1/go.mod h1:pKv1cDIaq7ehWGuXQ+A//1OIF+7DI+xudXtExMCbe9U= +k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= +k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/component-base v0.31.1 h1:UpOepcrX3rQ3ab5NB6g5iP0tvsgJWzxTyAo20sgYSy8= +k8s.io/component-base v0.31.1/go.mod h1:WGeaw7t/kTsqpVTaCoVEtillbqAhF2/JgvO0LDOMa0w= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 h1:1dWzkmJrrprYvjGwh9kEUxmcUV/CtNU8QM7h1FLWQOo= +k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA= +k8s.io/kubectl v0.31.1 h1:ih4JQJHxsEggFqDJEHSOdJ69ZxZftgeZvYo7M/cpp24= +k8s.io/kubectl v0.31.1/go.mod h1:aNuQoR43W6MLAtXQ/Bu4GDmoHlbhHKuyD49lmTC8eJM= +k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 h1:b2FmK8YH+QEwq/Sy2uAEhmqL5nPfGYbJOcaqjeYYZoA= +k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.19.0 h1:nWVM7aq+Il2ABxwiCizrVDSlmDcshi9llbaFbC0ji/Q= +sigs.k8s.io/controller-runtime v0.19.0/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/kustomize/api v0.17.3 h1:6GCuHSsxq7fN5yhF2XrC+AAr8gxQwhexgHflOAD/JJU= +sigs.k8s.io/kustomize/api v0.17.3/go.mod h1:TuDH4mdx7jTfK61SQ/j1QZM/QWR+5rmEiNjvYlhzFhc= +sigs.k8s.io/kustomize/kyaml v0.17.2 h1:+AzvoJUY0kq4QAhH/ydPHHMRLijtUKiyVyh7fOSshr0= +sigs.k8s.io/kustomize/kyaml v0.17.2/go.mod h1:9V0mCjIEYjlXuCdYsSXvyoy2BTsLESH7TlGV81S282U= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/kubectl-plugin/pkg/cmd/create/create.go b/kubectl-plugin/pkg/cmd/create/create.go new file mode 100644 index 00000000000..c3812d24c52 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/create/create.go @@ -0,0 +1,27 @@ +package create + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" +) + +func NewCreateCommand(streams genericclioptions.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "create", + Short: "Create Ray resources", + Long: `Allow users to create Ray resources. And based on input, will generate the necessary files`, + Run: func(cmd *cobra.Command, args []string) { + if len(args) > 0 { + fmt.Println(fmt.Errorf("unknown command(s) %q", strings.Join(args, " "))) + } + cmd.HelpFunc()(cmd, args) + }, + } + + cmd.AddCommand(NewCreateClusterCommand(streams)) + cmd.AddCommand(NewCreateWorkerGroupCommand(streams)) + return cmd +} diff --git a/kubectl-plugin/pkg/cmd/create/create_cluster.go b/kubectl-plugin/pkg/cmd/create/create_cluster.go new file mode 100644 index 00000000000..030bf99a47d --- /dev/null +++ b/kubectl-plugin/pkg/cmd/create/create_cluster.go @@ -0,0 +1,160 @@ +package create + +import ( + "context" + "fmt" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/generation" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" +) + +type CreateClusterOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericclioptions.IOStreams + clusterName string + rayVersion string + image string + headCPU string + headMemory string + workerCPU string + workerMemory string + workerGPU string + workerReplicas int32 + dryRun bool +} + +var ( + createClusterLong = templates.LongDesc(` + Creates Ray Cluster from inputed file or generate one for user. + `) + + createClusterExample = templates.Examples(` + # Create a Ray cluster using default values + kubectl ray create cluster sample-cluster + + # Creates Ray Cluster from flags input + kubectl ray create cluster sample-cluster --ray-version 2.39.0 --image rayproject/ray:2.39.0 --head-cpu 1 --head-memory 5Gi --worker-replicas 3 --worker-cpu 1 --worker-memory 5Gi + `) +) + +func NewCreateClusterOptions(streams genericclioptions.IOStreams) *CreateClusterOptions { + return &CreateClusterOptions{ + configFlags: genericclioptions.NewConfigFlags(true), + ioStreams: &streams, + } +} + +func NewCreateClusterCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewCreateClusterOptions(streams) + cmdFactory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "cluster [CLUSTERNAME]", + Short: "Create Ray Cluster resource", + Long: createClusterLong, + Example: createClusterExample, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(cmd, args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), cmdFactory) + }, + } + + cmd.Flags().StringVar(&options.rayVersion, "ray-version", "2.39.0", "Ray Version to use in the Ray Cluster yaml. Default to 2.39.0") + cmd.Flags().StringVar(&options.image, "image", options.image, "Ray image to use in the Ray Cluster yaml") + cmd.Flags().StringVar(&options.headCPU, "head-cpu", "2", "Number of CPU for the ray head. Default to 2") + cmd.Flags().StringVar(&options.headMemory, "head-memory", "4Gi", "Amount of memory to use for the ray head. Default to 4Gi") + cmd.Flags().Int32Var(&options.workerReplicas, "worker-replicas", 1, "Number of the worker group replicas. Default of 1") + cmd.Flags().StringVar(&options.workerCPU, "worker-cpu", "2", "Number of CPU for the ray worker. Default to 2") + cmd.Flags().StringVar(&options.workerMemory, "worker-memory", "4Gi", "Amount of memory to use for the ray worker. Default to 4Gi") + cmd.Flags().StringVar(&options.workerGPU, "worker-gpu", "0", "Number of GPU for the ray worker. Default to 0") + cmd.Flags().BoolVar(&options.dryRun, "dry-run", false, "Will not apply the generated cluster and will print out the generated yaml") + + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *CreateClusterOptions) Complete(cmd *cobra.Command, args []string) error { + if *options.configFlags.Namespace == "" { + *options.configFlags.Namespace = "default" + } + + if len(args) != 1 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + options.clusterName = args[0] + + if options.image == "" { + options.image = fmt.Sprintf("rayproject/ray:%s", options.rayVersion) + } + + return nil +} + +func (options *CreateClusterOptions) Validate() error { + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + + return nil +} + +func (options *CreateClusterOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + k8sClient, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + + // Will generate yaml file + rayClusterObject := generation.RayClusterYamlObject{ + Namespace: *options.configFlags.Namespace, + ClusterName: options.clusterName, + RayClusterSpecObject: generation.RayClusterSpecObject{ + RayVersion: options.rayVersion, + Image: options.image, + HeadCPU: options.headCPU, + HeadMemory: options.headMemory, + WorkerReplicas: options.workerReplicas, + WorkerCPU: options.workerCPU, + WorkerMemory: options.workerMemory, + WorkerGPU: options.workerGPU, + }, + } + + rayClusterac := rayClusterObject.GenerateRayClusterApplyConfig() + + // If dry run is enabled, it will call the yaml converter and print out the yaml + if options.dryRun { + rayClusterYaml, err := generation.ConvertRayClusterApplyConfigToYaml(rayClusterac) + if err != nil { + return fmt.Errorf("Error when converting RayClusterApplyConfig to yaml: %w", err) + } + fmt.Printf("%s\n", rayClusterYaml) + return nil + } + + // TODO: Decide whether to save yaml to file or not. + + // Applying the YAML + result, err := k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Apply(ctx, rayClusterac, metav1.ApplyOptions{FieldManager: "kubectl-plugin"}) + if err != nil { + return fmt.Errorf("Failed to create Ray Cluster with: %w", err) + } + fmt.Printf("Created Ray Cluster: %s\n", result.GetName()) + return nil +} diff --git a/kubectl-plugin/pkg/cmd/create/create_cluster_test.go b/kubectl-plugin/pkg/cmd/create/create_cluster_test.go new file mode 100644 index 00000000000..03478b20f83 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/create/create_cluster_test.go @@ -0,0 +1,113 @@ +package create + +import ( + "os" + "path/filepath" + "testing" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/clientcmd/api" +) + +func TestRayCreateClusterComplete(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeCreateClusterOptions := NewCreateClusterOptions(testStreams) + fakeArgs := []string{"testRayClusterName"} + cmd := &cobra.Command{Use: "cluster"} + + err := fakeCreateClusterOptions.Complete(cmd, fakeArgs) + assert.Nil(t, err) + assert.Equal(t, "default", *fakeCreateClusterOptions.configFlags.Namespace) + assert.Equal(t, "testRayClusterName", fakeCreateClusterOptions.clusterName) +} + +func TestRayCreateClusterValidate(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + + testNS, testContext, testBT, testImpersonate := "test-namespace", "test-contet", "test-bearer-token", "test-person" + + // Fake directory for kubeconfig + fakeDir, err := os.MkdirTemp("", "fake-dir") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + // Set up fake config for kubeconfig + config := &api.Config{ + Clusters: map[string]*api.Cluster{ + "test-cluster": { + Server: "https://fake-kubernetes-cluster.example.com", + InsecureSkipTLSVerify: true, // For testing purposes + }, + }, + Contexts: map[string]*api.Context{ + "my-fake-context": { + Cluster: "my-fake-cluster", + AuthInfo: "my-fake-user", + }, + }, + CurrentContext: "my-fake-context", + AuthInfos: map[string]*api.AuthInfo{ + "my-fake-user": { + Token: "", // Empty for testing without authentication + }, + }, + } + + fakeFile := filepath.Join(fakeDir, ".kubeconfig") + + err = clientcmd.WriteToFile(*config, fakeFile) + assert.Nil(t, err) + + fakeConfigFlags := &genericclioptions.ConfigFlags{ + Namespace: &testNS, + Context: &testContext, + KubeConfig: &fakeFile, + BearerToken: &testBT, + Impersonate: &testImpersonate, + ImpersonateGroup: &[]string{"fake-group"}, + } + + tests := []struct { + name string + opts *CreateClusterOptions + expectError string + }{ + { + name: "Test validation when no context is set", + opts: &CreateClusterOptions{ + configFlags: genericclioptions.NewConfigFlags(false), + ioStreams: &testStreams, + }, + expectError: "no context is currently set, use \"kubectl config use-context \" to select a new one", + }, + { + name: "Successful submit job validation with RayJob", + opts: &CreateClusterOptions{ + configFlags: fakeConfigFlags, + ioStreams: &testStreams, + clusterName: "fakeclustername", + rayVersion: "ray-version", + image: "ray-image", + headCPU: "5", + headMemory: "5Gi", + workerReplicas: 3, + workerCPU: "4", + workerMemory: "5Gi", + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.opts.Validate() + if tc.expectError != "" { + assert.Equal(t, tc.expectError, err.Error()) + } else { + assert.Nil(t, err) + } + }) + } +} diff --git a/kubectl-plugin/pkg/cmd/create/create_workergroup.go b/kubectl-plugin/pkg/cmd/create/create_workergroup.go new file mode 100644 index 00000000000..142b50073df --- /dev/null +++ b/kubectl-plugin/pkg/cmd/create/create_workergroup.go @@ -0,0 +1,177 @@ +package create + +import ( + "context" + "fmt" + + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/client-go/rest" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" +) + +type CreateWorkerGroupOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericclioptions.IOStreams + clusterName string + groupName string + rayVersion string + image string + workerCPU string + workerGPU string + workerMemory string + workerReplicas int32 + workerMinReplicas int32 + workerMaxReplicas int32 +} + +var ( + createWorkerGroupLong = templates.LongDesc(` + Adds a worker group to an existing RayCluster. + `) + + createWorkerGroupExample = templates.Examples(` + # Create a worker group in an existing RayCluster + kubectl ray create worker-group example-group --cluster sample-cluster --image rayproject/ray:2.39.0 --worker-cpu=2 --worker-memory=5Gi + `) +) + +func NewCreateWorkerGroupOptions(streams genericclioptions.IOStreams) *CreateWorkerGroupOptions { + return &CreateWorkerGroupOptions{ + configFlags: genericclioptions.NewConfigFlags(true), + ioStreams: &streams, + } +} + +func NewCreateWorkerGroupCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewCreateWorkerGroupOptions(streams) + cmdFactory := cmdutil.NewFactory(options.configFlags) + // Silence warnings to avoid messages like 'unknown field "spec.headGroupSpec.template.metadata.creationTimestamp"' + // See https://github.com/kubernetes/kubernetes/issues/67610 for more details. + rest.SetDefaultWarningHandler(rest.NoWarnings{}) + + cmd := &cobra.Command{ + Use: "workergroup [WORKERGROUP]", + Short: "Create worker group in an existing RayCluster", + Long: createWorkerGroupLong, + Example: createWorkerGroupExample, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(cmd, args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), cmdFactory) + }, + } + + cmd.Flags().StringVar(&options.clusterName, "ray-cluster", "", "The name of the RayCluster to add a worker group.") + cmd.Flags().StringVar(&options.rayVersion, "ray-version", "2.39.0", "Ray Version to use in the Ray Cluster yaml. Default to 2.39.0") + cmd.Flags().StringVar(&options.image, "image", options.image, "Ray image to use in the Ray Cluster yaml") + cmd.Flags().Int32Var(&options.workerReplicas, "worker-replicas", 1, "Number of the worker group replicas. Default of 1") + cmd.Flags().Int32Var(&options.workerMinReplicas, "worker-min-replicas", 1, "Number of the worker group replicas. Default of 10") + cmd.Flags().Int32Var(&options.workerMaxReplicas, "worker-max-replicas", 10, "Number of the worker group replicas. Default of 10") + cmd.Flags().StringVar(&options.workerCPU, "worker-cpu", "2", "Number of CPU for the ray worker. Default to 2") + cmd.Flags().StringVar(&options.workerGPU, "worker-gpu", "0", "Number of GPU for the ray worker. Default to 0") + cmd.Flags().StringVar(&options.workerMemory, "worker-memory", "4Gi", "Amount of memory to use for the ray worker. Default to 4Gi") + + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *CreateWorkerGroupOptions) Complete(cmd *cobra.Command, args []string) error { + if *options.configFlags.Namespace == "" { + *options.configFlags.Namespace = "default" + } + + if len(args) != 1 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + options.groupName = args[0] + + if options.image == "" { + options.image = fmt.Sprintf("rayproject/ray:%s", options.rayVersion) + } + + return nil +} + +func (options *CreateWorkerGroupOptions) Validate() error { + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + + return nil +} + +func (options *CreateWorkerGroupOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + k8sClient, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + + rayCluster, err := k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Get(ctx, options.clusterName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error getting RayCluster: %w", err) + } + + newRayCluster := rayCluster.DeepCopy() + podTemplate := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "ray-worker", + Image: options.image, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(options.workerCPU), + corev1.ResourceMemory: resource.MustParse(options.workerMemory), + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse(options.workerMemory), + }, + }, + }, + }, + }, + } + + gpuResource := resource.MustParse(options.workerGPU) + if !gpuResource.IsZero() { + podTemplate.Spec.Containers[0].Resources.Requests[corev1.ResourceName("nvidia.com/gpu")] = gpuResource + podTemplate.Spec.Containers[0].Resources.Limits[corev1.ResourceName("nvidia.com/gpu")] = gpuResource + } + + workerGroup := rayv1.WorkerGroupSpec{ + GroupName: options.groupName, + Replicas: &options.workerReplicas, + MinReplicas: &options.workerMinReplicas, + MaxReplicas: &options.workerMaxReplicas, + RayStartParams: map[string]string{}, + Template: podTemplate, + } + newRayCluster.Spec.WorkerGroupSpecs = append(newRayCluster.Spec.WorkerGroupSpecs, workerGroup) + + newRayCluster, err = k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Update(ctx, newRayCluster, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error updating RayCluster with new worker group: %w", err) + } + + fmt.Printf("Updated RayCluster %s/%s with new worker group\n", newRayCluster.Namespace, newRayCluster.Name) + return nil +} diff --git a/kubectl-plugin/pkg/cmd/delete/delete.go b/kubectl-plugin/pkg/cmd/delete/delete.go new file mode 100644 index 00000000000..0a4a87e1edc --- /dev/null +++ b/kubectl-plugin/pkg/cmd/delete/delete.go @@ -0,0 +1,169 @@ +package kubectlraydelete + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/genericiooptions" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" +) + +type DeleteOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericiooptions.IOStreams + ResourceType util.ResourceType + ResourceName string + Namespace string +} + +var deleteExample = templates.Examples(` + # Delete RayCluster + kubectl ray delete sample-raycluster + + # Delete RayCluster with specificed ray resource + kubectl ray delete raycluster/sample-raycluster + + # Delete RayJob + kubectl ray delete rayjob/sample-rayjob + + # Delete RayService + kubectl ray delete rayservice/sample-rayservice + `) + +func NewDeleteOptions(streams genericiooptions.IOStreams) *DeleteOptions { + configFlags := genericclioptions.NewConfigFlags(true) + return &DeleteOptions{ + ioStreams: &streams, + configFlags: configFlags, + } +} + +func NewDeleteCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewDeleteOptions(streams) + factory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "delete (RAYCLUSTER | TYPE/NAME)", + Short: "Delete Ray resoruce.", + Example: deleteExample, + Long: `Deletes Ray custom resources such as RayCluster, RayService, or RayJob`, + ValidArgsFunction: completion.RayClusterResourceNameCompletionFunc(factory), + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(cmd, args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), factory) + }, + } + + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *DeleteOptions) Complete(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + + if *options.configFlags.Namespace == "" { + options.Namespace = "default" + } else { + options.Namespace = *options.configFlags.Namespace + } + + typeAndName := strings.Split(args[0], "/") + if len(typeAndName) == 1 { + options.ResourceType = util.RayCluster + options.ResourceName = typeAndName[0] + } else { + if len(typeAndName) != 2 || typeAndName[1] == "" { + return cmdutil.UsageErrorf(cmd, "invalid resource type/name: %s", args[0]) + } + + switch strings.ToLower(typeAndName[0]) { + case string(util.RayCluster): + options.ResourceType = util.RayCluster + case string(util.RayJob): + options.ResourceType = util.RayJob + case string(util.RayService): + options.ResourceType = util.RayService + default: + return cmdutil.UsageErrorf(cmd, "unsupported resource type: %s", args[0]) + } + + options.ResourceName = typeAndName[1] + } + + return nil +} + +func (options *DeleteOptions) Validate() error { + // Overrides and binds the kube config then retrieves the merged result + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + return nil +} + +func (options *DeleteOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + k8sClient, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + + // Ask user for confirmation + reader := bufio.NewReader(os.Stdin) + fmt.Printf("Are you sure you want to delete %s %s? (y/yes/n/no) ", options.ResourceType, options.ResourceName) + confirmation, err := reader.ReadString('\n') + if err != nil { + return fmt.Errorf("Failed to read user input: %w", err) + } + + switch strings.ToLower(strings.TrimSpace(confirmation)) { + case "y", "yes": + case "n", "no": + fmt.Printf("Canceled deletion.\n") + return nil + default: + fmt.Printf("Unknown input %s\n", confirmation) + return nil + } + + // Delete the Ray Resources + switch options.ResourceType { + case util.RayCluster: + err = k8sClient.RayClient().RayV1().RayClusters(options.Namespace).Delete(ctx, options.ResourceName, metav1.DeleteOptions{}) + case util.RayJob: + err = k8sClient.RayClient().RayV1().RayJobs(options.Namespace).Delete(ctx, options.ResourceName, metav1.DeleteOptions{}) + case util.RayService: + err = k8sClient.RayClient().RayV1().RayServices(options.Namespace).Delete(ctx, options.ResourceName, metav1.DeleteOptions{}) + default: + err = fmt.Errorf("unknown/unsupported resource type: %s", options.ResourceType) + } + + if err != nil { + return fmt.Errorf("Failed to delete %s/%s: %w", options.ResourceType, options.ResourceName, err) + } + + fmt.Printf("Delete %s %s\n", options.ResourceType, options.ResourceName) + return nil +} diff --git a/kubectl-plugin/pkg/cmd/delete/delete_test.go b/kubectl-plugin/pkg/cmd/delete/delete_test.go new file mode 100644 index 00000000000..c1dae0b590a --- /dev/null +++ b/kubectl-plugin/pkg/cmd/delete/delete_test.go @@ -0,0 +1,112 @@ +package kubectlraydelete + +import ( + "testing" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "k8s.io/cli-runtime/pkg/genericclioptions" +) + +func TestComplete(t *testing.T) { + cmd := &cobra.Command{Use: "deleete"} + + tests := []struct { + name string + namespace string + expectedResourceType util.ResourceType + expectedNamespace string + expectedName string + args []string + hasErr bool + }{ + { + name: "valid raycluster without explicit resource and without namespace", + namespace: "", + expectedResourceType: util.RayCluster, + expectedNamespace: "default", + expectedName: "test-raycluster", + args: []string{"test-raycluster"}, + hasErr: false, + }, + { + name: "valid raycluster with explicit resource and with namespace", + namespace: "test-namespace", + expectedResourceType: util.RayCluster, + expectedNamespace: "test-namespace", + expectedName: "test-raycluster", + args: []string{"raycluster/test-raycluster"}, + hasErr: false, + }, + { + name: "valid raycluster without explicit resource and with namespace", + namespace: "test-namespace", + expectedResourceType: util.RayCluster, + expectedNamespace: "test-namespace", + expectedName: "test-raycluster", + args: []string{"test-raycluster"}, + hasErr: false, + }, + { + name: "valid rayjob with namespace", + namespace: "test-namespace", + expectedResourceType: util.RayJob, + expectedNamespace: "test-namespace", + expectedName: "test-rayjob", + args: []string{"rayjob/test-rayjob"}, + hasErr: false, + }, + { + name: "valid rayservice with namespace", + namespace: "test-namespace", + expectedResourceType: util.RayService, + expectedNamespace: "test-namespace", + expectedName: "test-rayservice", + args: []string{"rayservice/test-rayservice"}, + hasErr: false, + }, + { + name: "invalid service type", + namespace: "test-namespace", + args: []string{"rayserve/test-rayserve"}, + hasErr: true, + }, + { + name: "valid raycluster with namespace but weird ray type casing", + namespace: "test-namespace", + expectedResourceType: util.RayCluster, + expectedNamespace: "test-namespace", + expectedName: "test-raycluster", + args: []string{"rayCluStER/test-raycluster"}, + hasErr: false, + }, + { + name: "invalid args, too many args", + args: []string{"test", "raytype", "raytypename"}, + hasErr: true, + }, + { + name: "invalid args, non valid resource type", + args: []string{"test/test"}, + hasErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeDeleteOptions := NewDeleteOptions(testStreams) + fakeDeleteOptions.configFlags.Namespace = &tc.namespace + err := fakeDeleteOptions.Complete(cmd, tc.args) + if tc.hasErr { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.expectedName, fakeDeleteOptions.ResourceName) + assert.Equal(t, tc.expectedNamespace, fakeDeleteOptions.Namespace) + assert.Equal(t, tc.expectedResourceType, fakeDeleteOptions.ResourceType) + } + }) + } +} diff --git a/kubectl-plugin/pkg/cmd/get/get.go b/kubectl-plugin/pkg/cmd/get/get.go new file mode 100644 index 00000000000..5b0af5a16ce --- /dev/null +++ b/kubectl-plugin/pkg/cmd/get/get.go @@ -0,0 +1,28 @@ +package get + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" +) + +func NewGetCommand(streams genericclioptions.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "get", + Short: "Display one or many Ray resources.", + Long: `Prints a table of the most important information about the specified Ray resources.`, + Aliases: []string{"list"}, + SilenceUsage: true, + Run: func(cmd *cobra.Command, args []string) { + if len(args) > 0 { + fmt.Println(fmt.Errorf("unknown command(s) %q", strings.Join(args, " "))) + } + cmd.HelpFunc()(cmd, args) + }, + } + + cmd.AddCommand(NewGetClusterCommand(streams)) + return cmd +} diff --git a/kubectl-plugin/pkg/cmd/get/get_cluster.go b/kubectl-plugin/pkg/cmd/get/get_cluster.go new file mode 100644 index 00000000000..ffae1895d47 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/get/get_cluster.go @@ -0,0 +1,153 @@ +package get + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion" + "github.com/spf13/cobra" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/duration" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/printers" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +type GetClusterOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericclioptions.IOStreams + args []string + AllNamespaces bool +} + +func NewGetClusterOptions(streams genericclioptions.IOStreams) *GetClusterOptions { + return &GetClusterOptions{ + configFlags: genericclioptions.NewConfigFlags(true), + ioStreams: &streams, + } +} + +func NewGetClusterCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewGetClusterOptions(streams) + // Initialize the factory for later use with the current config flag + cmdFactory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "cluster [NAME]", + Short: "Get cluster information.", + SilenceUsage: true, + ValidArgsFunction: completion.RayClusterCompletionFunc(cmdFactory), + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + // running cmd.Execute or cmd.ExecuteE sets the context, which will be done by root + k8sClient, err := client.NewClient(cmdFactory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + return options.Run(cmd.Context(), k8sClient) + }, + } + cmd.Flags().BoolVarP(&options.AllNamespaces, "all-namespaces", "A", options.AllNamespaces, "If present, list the requested clusters across all namespaces. Namespace in current context is ignored even if specified with --namespace.") + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *GetClusterOptions) Complete(args []string) error { + if *options.configFlags.Namespace == "" { + options.AllNamespaces = true + } + + options.args = args + return nil +} + +func (options *GetClusterOptions) Validate() error { + // Overrides and binds the kube config then retrieves the merged result + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + if len(options.args) > 1 { + return fmt.Errorf("too many arguments, either one or no arguments are allowed") + } + return nil +} + +func (options *GetClusterOptions) Run(ctx context.Context, k8sClient client.Client) error { + var err error + var rayclusterList *rayv1.RayClusterList + + listopts := v1.ListOptions{} + if len(options.args) == 1 { + listopts = v1.ListOptions{ + FieldSelector: fmt.Sprintf("metadata.name=%s", options.args[0]), + } + } + + if options.AllNamespaces { + rayclusterList, err = k8sClient.RayClient().RayV1().RayClusters("").List(ctx, listopts) + if err != nil { + return fmt.Errorf("unable to retrieve raycluster for all namespaces: %w", err) + } + } else { + rayclusterList, err = k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).List(ctx, listopts) + if err != nil { + return fmt.Errorf("unable to retrieve raycluster for namespace %s: %w", *options.configFlags.Namespace, err) + } + } + + return printClusters(rayclusterList, options.ioStreams.Out) +} + +func printClusters(rayclusterList *rayv1.RayClusterList, output io.Writer) error { + resultTablePrinter := printers.NewTablePrinter(printers.PrintOptions{}) + + resTable := &v1.Table{ + ColumnDefinitions: []v1.TableColumnDefinition{ + {Name: "Name", Type: "string"}, + {Name: "Namespace", Type: "string"}, + {Name: "Desired Workers", Type: "string"}, + {Name: "Available Workers", Type: "string"}, + {Name: "CPUs", Type: "string"}, + {Name: "GPUs", Type: "string"}, + {Name: "TPUs", Type: "string"}, + {Name: "Memory", Type: "string"}, + {Name: "Age", Type: "string"}, + }, + } + + for _, raycluster := range rayclusterList.Items { + age := duration.HumanDuration(time.Since(raycluster.GetCreationTimestamp().Time)) + if raycluster.GetCreationTimestamp().Time.IsZero() { + age = "" + } + resTable.Rows = append(resTable.Rows, v1.TableRow{ + Cells: []interface{}{ + raycluster.GetName(), + raycluster.GetNamespace(), + raycluster.Status.DesiredWorkerReplicas, + raycluster.Status.AvailableWorkerReplicas, + raycluster.Status.DesiredCPU.String(), + raycluster.Status.DesiredGPU.String(), + raycluster.Status.DesiredTPU.String(), + raycluster.Status.DesiredMemory.String(), + age, + }, + }) + } + + return resultTablePrinter.PrintObj(resTable, output) +} diff --git a/kubectl-plugin/pkg/cmd/get/get_cluster_test.go b/kubectl-plugin/pkg/cmd/get/get_cluster_test.go new file mode 100644 index 00000000000..79c8623f783 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/get/get_cluster_test.go @@ -0,0 +1,217 @@ +package get + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/printers" + kubefake "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/clientcmd/api" + cmdtesting "k8s.io/kubectl/pkg/cmd/testing" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayClientFake "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/fake" +) + +// This is to test Complete() and ensure that it is setting the namespace and arguments correctly +// No validation test is done here +func TestRayClusterGetComplete(t *testing.T) { + // Initialize members of the cluster get option struct and the struct itself + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeClusterGetOptions := NewGetClusterOptions(testStreams) + fakeArgs := []string{"Expected", "output"} + + *fakeClusterGetOptions.configFlags.Namespace = "" + fakeClusterGetOptions.AllNamespaces = false + + err := fakeClusterGetOptions.Complete(fakeArgs) + assert.Nil(t, err) + + assert.True(t, fakeClusterGetOptions.AllNamespaces) + assert.Equal(t, fakeClusterGetOptions.args, fakeArgs) +} + +// Test the Validation() step of the command. +func TestRayClusterGetValidate(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + + testNS, testContext, testBT, testImpersonate := "test-namespace", "test-context", "test-bearer-token", "test-person" + + // Fake directory for kubeconfig + fakeDir, err := os.MkdirTemp("", "fake-config") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + // Set up fake config for kubeconfig + config := &api.Config{ + Clusters: map[string]*api.Cluster{ + "test-cluster": { + Server: "https://fake-kubernetes-cluster.example.com", + InsecureSkipTLSVerify: true, // For testing purposes + }, + }, + Contexts: map[string]*api.Context{ + "my-fake-context": { + Cluster: "my-fake-cluster", + AuthInfo: "my-fake-user", + }, + }, + CurrentContext: "my-fake-context", + AuthInfos: map[string]*api.AuthInfo{ + "my-fake-user": { + Token: "", // Empty for testing without authentication + }, + }, + } + + fakeFile := filepath.Join(fakeDir, ".kubeconfig") + + err = clientcmd.WriteToFile(*config, fakeFile) + assert.Nil(t, err) + + // Initialize the fake config flag with the fake kubeconfig and values + fakeConfigFlags := &genericclioptions.ConfigFlags{ + Namespace: &testNS, + Context: &testContext, + KubeConfig: &fakeFile, + BearerToken: &testBT, + Impersonate: &testImpersonate, + ImpersonateGroup: &[]string{"fake-group"}, + } + + tests := []struct { + name string + opts *GetClusterOptions + expect string + expectError string + }{ + { + name: "Test validation when no context is set", + opts: &GetClusterOptions{ + configFlags: genericclioptions.NewConfigFlags(false), + AllNamespaces: false, + args: []string{"random_arg"}, + ioStreams: &testStreams, + }, + expectError: "no context is currently set, use \"kubectl config use-context \" to select a new one", + }, + { + name: "Test validation when more than 1 arg", + opts: &GetClusterOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + AllNamespaces: false, + args: []string{"fake", "args"}, + ioStreams: &testStreams, + }, + expectError: "too many arguments, either one or no arguments are allowed", + }, + { + name: "Successful validation call", + opts: &GetClusterOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + AllNamespaces: false, + args: []string{"random_arg"}, + ioStreams: &testStreams, + }, + expectError: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.opts.Validate() + if tc.expectError != "" { + assert.Error(t, err) + assert.Equal(t, tc.expectError, err.Error()) + } else { + assert.NoError(t, err) + } + }) + } +} + +// Tests the Run() step of the command and ensure that the output is as expected. +func TestRayClusterGetRun(t *testing.T) { + tf := cmdtesting.NewTestFactory().WithNamespace("test") + defer tf.Cleanup() + + testStreams, _, resBuf, _ := genericclioptions.NewTestIOStreams() + + fakeClusterGetOptions := NewGetClusterOptions(testStreams) + + rayCluster := &rayv1.RayCluster{ + ObjectMeta: v1.ObjectMeta{ + Name: "raycluster-kuberay", + Namespace: "test", + }, + Status: rayv1.RayClusterStatus{ + DesiredWorkerReplicas: 2, + AvailableWorkerReplicas: 2, + DesiredCPU: resource.MustParse("6"), + DesiredGPU: resource.MustParse("1"), + DesiredTPU: resource.MustParse("1"), + DesiredMemory: resource.MustParse("24Gi"), + State: rayv1.Ready, + }, + } + + kubeClientSet := kubefake.NewClientset() + rayClient := rayClientFake.NewSimpleClientset(rayCluster) + k8sClients := client.NewClientForTesting(kubeClientSet, rayClient) + + // Initialize the printer with an empty print options since we are setting the column definition later + expectedTestResultTable := printers.NewTablePrinter(printers.PrintOptions{}) + + // Define the column names and types + testResTable := &v1.Table{ + ColumnDefinitions: []v1.TableColumnDefinition{ + {Name: "Name", Type: "string"}, + {Name: "Namespace", Type: "string"}, + {Name: "Desired Workers", Type: "string"}, + {Name: "Available Workers", Type: "string"}, + {Name: "CPUs", Type: "string"}, + {Name: "GPUs", Type: "string"}, + {Name: "TPUs", Type: "string"}, + {Name: "Memory", Type: "string"}, + {Name: "Age", Type: "string"}, + }, + } + + testResTable.Rows = append(testResTable.Rows, v1.TableRow{ + Cells: []interface{}{ + "raycluster-kuberay", + "test", + "2", + "2", + "6", + "1", + "1", + "24Gi", + "", + }, + }) + + // Result buffer for the expected table result + var resbuffer bytes.Buffer + err := expectedTestResultTable.PrintObj(testResTable, &resbuffer) + assert.Nil(t, err) + + err = fakeClusterGetOptions.Run(context.Background(), k8sClients) + assert.Nil(t, err) + + if e, a := resbuffer.String(), resBuf.String(); e != a { + t.Errorf("\nexpected\n%v\ngot\n%v", e, a) + } +} diff --git a/kubectl-plugin/pkg/cmd/job/job.go b/kubectl-plugin/pkg/cmd/job/job.go new file mode 100644 index 00000000000..4734615ebdb --- /dev/null +++ b/kubectl-plugin/pkg/cmd/job/job.go @@ -0,0 +1,20 @@ +package job + +import ( + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" +) + +func NewJobCommand(streams genericclioptions.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "job", + Short: "submit ray job", + SilenceUsage: true, + Run: func(cmd *cobra.Command, args []string) { + cmd.HelpFunc()(cmd, args) + }, + } + + cmd.AddCommand(NewJobSubmitCommand(streams)) + return cmd +} diff --git a/kubectl-plugin/pkg/cmd/job/job_submit.go b/kubectl-plugin/pkg/cmd/job/job_submit.go new file mode 100644 index 00000000000..ff48e60d4b1 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/job/job_submit.go @@ -0,0 +1,586 @@ +package job + +import ( + "bufio" + "context" + "fmt" + "log" + "net/http" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/genericiooptions" + "k8s.io/kubectl/pkg/cmd/portforward" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" + "sigs.k8s.io/yaml" + + "github.com/google/shlex" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/generation" + "github.com/spf13/cobra" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayscheme "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" +) + +const ( + dashboardAddr = "http://localhost:8265" + clusterTimeout = 120.0 + portforwardtimeout = 60.0 +) + +type SubmitJobOptions struct { + ioStreams *genericiooptions.IOStreams + configFlags *genericclioptions.ConfigFlags + RayJob *rayv1.RayJob + submissionID string + entryPoint string + fileName string + workingDir string + runtimeEnv string + headers string + verify string + cluster string + runtimeEnvJson string + entryPointResource string + metadataJson string + logStyle string + logColor string + rayjobName string + rayVersion string + image string + headCPU string + headMemory string + workerCPU string + workerMemory string + entryPointCPU float32 + entryPointGPU float32 + entryPointMemory int + workerReplicas int32 + noWait bool + dryRun bool +} + +var ( + jobSubmitLong = templates.LongDesc(` + Submit ray job to ray cluster as one would using ray CLI e.g. 'ray job submit ENTRYPOINT'. Command supports all options that 'ray job submit' supports, except '--address'. + If RayCluster is already setup, use 'kubectl ray session' instead. + + If no rayjob yaml file is specified, the command will create a default rayjob for the user. + + Command will apply RayJob CR and also submit the ray job. RayJob CR is required. + `) + + jobSubmitExample = templates.Examples(` + # Submit ray job with working-directory + kubectl ray job submit -f rayjob.yaml --working-dir /path/to/working-dir/ -- python my_script.py + + # Submit ray job with runtime Env file and working directory + kubectl ray job submit -f rayjob.yaml --working-dir /path/to/working-dir/ --runtime-env /runtimeEnv.yaml -- python my_script.py + + # Submit ray job with runtime Env file assuming runtime-env has working_dir set + kubectl ray job submit -f rayjob.yaml --runtime-env path/to/runtimeEnv.yaml -- python my_script.py + + # Submit generated ray job with default values and with runtime Env file and working directory + kubectl ray job submit --name rayjob-sample --working-dir /path/to/working-dir/ --runtime-env /runtimeEnv.yaml -- python my_script.py + + # Generate ray job with specifications and submit ray job with runtime Env file and working directory + kubectl ray job submit --name rayjob-sample --ray-version 2.39.0 --image rayproject/ray:2.39.0 --head-cpu 1 --head-memory 5Gi --worker-replicas 3 --worker-cpu 1 --worker-memory 5Gi --runtime-env path/to/runtimeEnv.yaml -- python my_script.py + + # Generate ray job with specifications and print out the generated rayjob in yaml format + kubectl ray job submit --dry-run --name rayjob-sample --ray-version 2.39.0 --image rayproject/ray:2.39.0 --head-cpu 1 --head-memory 5Gi --worker-replicas 3 --worker-cpu 1 --worker-memory 5Gi --runtime-env path/to/runtimeEnv.yaml -- python my_script.py + `) +) + +func NewJobSubmitOptions(streams genericiooptions.IOStreams) *SubmitJobOptions { + return &SubmitJobOptions{ + ioStreams: &streams, + configFlags: genericclioptions.NewConfigFlags(true), + } +} + +func NewJobSubmitCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewJobSubmitOptions(streams) + cmdFactory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "submit [OPTIONS] -f/--filename RAYJOB_YAML -- ENTRYPOINT", + Short: "Submit ray job to ray cluster", + Long: jobSubmitLong, + Example: jobSubmitExample, + RunE: func(cmd *cobra.Command, args []string) error { + entryPointStart := cmd.ArgsLenAtDash() + if entryPointStart == -1 || len(args[entryPointStart:]) == 0 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + options.entryPoint = strings.Join(args[entryPointStart:], " ") + if err := options.Complete(); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), cmdFactory) + }, + } + cmd.Flags().StringVarP(&options.fileName, "filename", "f", options.fileName, "Path and name of the Ray Job YAML file") + cmd.Flags().StringVar(&options.submissionID, "submission-id", options.submissionID, "ID to specify for the ray job. If not provided, one will be generated") + cmd.Flags().StringVar(&options.runtimeEnv, "runtime-env", options.runtimeEnv, "Path and name to the runtime env YAML file.") + cmd.Flags().StringVar(&options.workingDir, "working-dir", options.workingDir, "Directory containing files that your job will run in") + cmd.Flags().StringVar(&options.headers, "headers", options.headers, "Used to pass headers through http/s to Ray Cluster. Must be JSON formatting") + cmd.Flags().StringVar(&options.runtimeEnvJson, "runtime-env-json", options.runtimeEnvJson, "JSON-serialized runtime_env dictionary. Precedence over ray job CR.") + cmd.Flags().StringVar(&options.verify, "verify", options.verify, "Boolean indication to verify the server’s TLS certificate or a path to a file or directory of trusted certificates.") + cmd.Flags().StringVar(&options.entryPointResource, "entrypoint-resources", options.entryPointResource, "JSON-serialized dictionary mapping resource name to resource quantity") + cmd.Flags().StringVar(&options.metadataJson, "metadata-json", options.metadataJson, "JSON-serialized dictionary of metadata to attach to the job.") + cmd.Flags().StringVar(&options.logStyle, "log-style", options.logStyle, "Specific to 'ray job submit'. Options are 'auto | record | pretty'") + cmd.Flags().StringVar(&options.logColor, "log-color", options.logColor, "Specific to 'ray job submit'. Options are 'auto | false | true'") + cmd.Flags().Float32Var(&options.entryPointCPU, "entrypoint-num-cpus", options.entryPointCPU, "Number of CPU reserved for the for the entrypoint command") + cmd.Flags().Float32Var(&options.entryPointGPU, "entrypoint-num-gpus", options.entryPointGPU, "Number of GPU reserved for the for the entrypoint command") + cmd.Flags().IntVar(&options.entryPointMemory, "entrypoint-memory", options.entryPointMemory, "Amount of memory reserved for the entrypoint command") + cmd.Flags().BoolVar(&options.noWait, "no-wait", options.noWait, "If present, will not stream logs and wait for job to finish") + + cmd.Flags().StringVar(&options.rayjobName, "name", "", "Name of the ray job that will be generated") + cmd.Flags().StringVar(&options.rayVersion, "ray-version", "2.39.0", "Ray Version to use in the Ray Cluster yaml.") + cmd.Flags().StringVar(&options.image, "image", "rayproject/ray:2.39.0", "Ray image to use in the Ray Cluster yaml") + cmd.Flags().StringVar(&options.headCPU, "head-cpu", "2", "Number of CPU for the ray head") + cmd.Flags().StringVar(&options.headMemory, "head-memory", "4Gi", "Amount of memory to use for the ray head") + cmd.Flags().Int32Var(&options.workerReplicas, "worker-replicas", 1, "Number of the worker group replicas") + cmd.Flags().StringVar(&options.workerCPU, "worker-cpu", "2", "Number of CPU for the ray worker") + cmd.Flags().StringVar(&options.workerMemory, "worker-memory", "4Gi", "Amount of memory to use for the ray worker") + cmd.Flags().BoolVar(&options.dryRun, "dry-run", false, "Will not apply the generated cluster and will print out the generated yaml. Only works when filename is not provided") + + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *SubmitJobOptions) Complete() error { + if *options.configFlags.Namespace == "" { + *options.configFlags.Namespace = "default" + } + + if len(options.runtimeEnv) > 0 { + options.runtimeEnv = filepath.Clean(options.runtimeEnv) + } + + if options.fileName != "" { + options.fileName = filepath.Clean(options.fileName) + } + return nil +} + +func (options *SubmitJobOptions) Validate() error { + // Overrides and binds the kube config then retrieves the merged result + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + + if len(options.runtimeEnv) > 0 { + info, err := os.Stat(options.runtimeEnv) + if os.IsNotExist(err) { + return fmt.Errorf("Runtime Env file does not exist. Failed with: %w", err) + } else if err != nil { + return fmt.Errorf("Error occurred when checking runtime env file: %w", err) + } else if !info.Mode().IsRegular() { + return fmt.Errorf("Filename given is not a regular file. Failed with: %w", err) + } + + runtimeEnvWorkingDir, err := runtimeEnvHasWorkingDir(options.runtimeEnv) + if err != nil { + return fmt.Errorf("Error while checking runtime env: %w", err) + } + if len(runtimeEnvWorkingDir) > 0 && options.workingDir == "" { + options.workingDir = runtimeEnvWorkingDir + } + } + + // Take care of case where there is a filename input + if options.fileName != "" { + info, err := os.Stat(options.fileName) + if os.IsNotExist(err) { + return fmt.Errorf("Ray Job file does not exist. Failed with: %w", err) + } else if err != nil { + return fmt.Errorf("Error occurred when checking ray job file: %w", err) + } else if !info.Mode().IsRegular() { + return fmt.Errorf("Filename given is not a regular file. Failed with: %w", err) + } + + options.RayJob, err = decodeRayJobYaml(options.fileName) + if err != nil { + return fmt.Errorf("Failed to decode RayJob Yaml: %w", err) + } + + submissionMode := options.RayJob.Spec.SubmissionMode + if submissionMode != rayv1.InteractiveMode { + return fmt.Errorf("Submission mode of the Ray Job must be set to 'InteractiveMode'") + } + + runtimeEnvYaml := options.RayJob.Spec.RuntimeEnvYAML + if options.runtimeEnv == "" && options.runtimeEnvJson == "" { + runtimeJson, err := yaml.YAMLToJSON([]byte(runtimeEnvYaml)) + if err != nil { + return fmt.Errorf("Failed to convert runtime env to json: %w", err) + } + options.runtimeEnvJson = string(runtimeJson) + } + } else if strings.TrimSpace(options.rayjobName) == "" { + return fmt.Errorf("Must set either yaml file (--filename) or set ray job name (--name)") + } + + if options.workingDir == "" { + return fmt.Errorf("working directory is required, use --working-dir or set with runtime env") + } + + // Changed working dir clean to here instead of complete since calling Clean on empty string return "." and it would be dificult to determine if that is actually user input or not. + options.workingDir = filepath.Clean(options.workingDir) + return nil +} + +func (options *SubmitJobOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + k8sClients, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to initialize clientset: %w", err) + } + + if options.fileName == "" { + // Genarate the ray job. + rayJobObject := generation.RayJobYamlObject{ + RayJobName: options.rayjobName, + Namespace: *options.configFlags.Namespace, + SubmissionMode: "InteractiveMode", + RayClusterSpecObject: generation.RayClusterSpecObject{ + RayVersion: options.rayVersion, + Image: options.image, + HeadCPU: options.headCPU, + HeadMemory: options.headMemory, + WorkerCPU: options.workerCPU, + WorkerMemory: options.workerMemory, + WorkerReplicas: options.workerReplicas, + }, + } + rayJobApplyConfig := rayJobObject.GenerateRayJobApplyConfig() + + // Print out the yaml if it is a dry run + if options.dryRun { + resultYaml, err := generation.ConvertRayJobApplyConfigToYaml(rayJobApplyConfig) + if err != nil { + return fmt.Errorf("Failed to convert rayjob into yaml format: %w", err) + } + + fmt.Printf("%s\n", resultYaml) + return nil + } + + // Apply the generated yaml + rayJobApplyConfigResult, err := k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Apply(ctx, rayJobApplyConfig, v1.ApplyOptions{FieldManager: "ray-kubectl-plugin"}) + if err != nil { + return fmt.Errorf("Failed to apply generated yaml: %w", err) + } + options.RayJob = &rayv1.RayJob{} + options.RayJob.SetName(rayJobApplyConfigResult.Name) + } else { + options.RayJob, err = k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Create(ctx, options.RayJob, v1.CreateOptions{}) + if err != nil { + return fmt.Errorf("Error when creating RayJob CR: %w", err) + } + } + fmt.Printf("Submitted RayJob %s.\n", options.RayJob.GetName()) + + if len(options.RayJob.GetName()) > 0 { + // Add timeout? + for len(options.RayJob.Status.RayClusterName) == 0 { + options.RayJob, err = k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Get(ctx, options.RayJob.GetName(), v1.GetOptions{}) + if err != nil { + return fmt.Errorf("Failed to get Ray Job status") + } + time.Sleep(2 * time.Second) + } + options.cluster = options.RayJob.Status.RayClusterName + } else { + return fmt.Errorf("Unknown cluster and did not provide Ray Job. One of the fields must be set") + } + + // Wait til the cluster is ready + var clusterReady bool + clusterWaitStartTime := time.Now() + currTime := clusterWaitStartTime + fmt.Printf("Waiting for RayCluster\n") + fmt.Printf("Checking Cluster Status for cluster %s...\n", options.cluster) + for !clusterReady && currTime.Sub(clusterWaitStartTime).Seconds() <= clusterTimeout { + time.Sleep(2 * time.Second) + currCluster, err := k8sClients.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Get(ctx, options.cluster, v1.GetOptions{}) + if err != nil { + return fmt.Errorf("Failed to get cluster information with error: %w", err) + } + clusterReady = isRayClusterReady(currCluster) + if !clusterReady { + err = fmt.Errorf("Cluster is not ready: %w", err) + fmt.Println(err) + } + currTime = time.Now() + } + + if !clusterReady { + fmt.Printf("Deleting RayJob...\n") + err = k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Delete(ctx, options.RayJob.GetName(), v1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("Failed to clean up ray job after time out.: %w", err) + } + fmt.Printf("Cleaned Up RayJob: %s\n", options.RayJob.GetName()) + + return fmt.Errorf("Timed out waiting for cluster") + } + + svcName, err := k8sClients.GetRayHeadSvcName(ctx, *options.configFlags.Namespace, util.RayCluster, options.cluster) + if err != nil { + return fmt.Errorf("Failed to find service name: %w", err) + } + + // start port forward section + portForwardCmd := portforward.NewCmdPortForward(factory, *options.ioStreams) + portForwardCmd.SetArgs([]string{"service/" + svcName, fmt.Sprintf("%d:%d", 8265, 8265)}) + + // create new context for port-forwarding so we can cancel the context to stop the port forwarding only + portforwardctx, cancel := context.WithCancel(ctx) + defer cancel() + go func() { + fmt.Printf("Port Forwarding service %s\n", svcName) + if err := portForwardCmd.ExecuteContext(portforwardctx); err != nil { + log.Fatalf("Error occurred while port-forwarding Ray dashboard: %v", err) + } + }() + + // Wait for port forward to be ready + var portforwardReady bool + portforwardWaitStartTime := time.Now() + currTime = portforwardWaitStartTime + + portforwardCheckRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, dashboardAddr, nil) + if err != nil { + return fmt.Errorf("Error occurred when trying to create request to probe cluster endpoint: %w", err) + } + httpClient := http.Client{ + Timeout: 5 * time.Second, + } + fmt.Printf("Waiting for portforwarding...") + for !portforwardReady && currTime.Sub(portforwardWaitStartTime).Seconds() <= portforwardtimeout { + time.Sleep(2 * time.Second) + rayDashboardResponse, err := httpClient.Do(portforwardCheckRequest) + if err != nil { + err = fmt.Errorf("Error occurred when waiting for portforwarding: %w", err) + fmt.Println(err) + } + if rayDashboardResponse.StatusCode >= 200 && rayDashboardResponse.StatusCode < 300 { + portforwardReady = true + } + rayDashboardResponse.Body.Close() + currTime = time.Now() + } + if !portforwardReady { + return fmt.Errorf("Timed out waiting for port forwarding") + } + fmt.Printf("Portforwarding started on %s\n", dashboardAddr) + + // Submitting ray job to cluster + raySubmitCmd, err := options.raySubmitCmd() + if err != nil { + return fmt.Errorf("failed to create Ray submit command with error: %w", err) + } + fmt.Printf("Ray command: %v\n", raySubmitCmd) + cmd := exec.Command(raySubmitCmd[0], raySubmitCmd[1:]...) //nolint:gosec // command is sanitized in raySubmitCmd() and file paths are cleaned in Complete() + + // Get the outputs/pipes for `ray job submit` outputs + rayCmdStdOut, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("Error while setting up `ray job submit` stdout: %w", err) + } + rayCmdStdErr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("Error while setting up `ray job submit` stderr: %w", err) + } + + go func() { + fmt.Printf("Running ray submit job command...\n") + err := cmd.Start() + if err != nil { + log.Fatalf("error occurred while running command %s: %v", fmt.Sprint(raySubmitCmd), err) + } + }() + + var rayJobID string + if options.submissionID != "" { + rayJobID = options.submissionID + } + // Make channel for retrieving rayJobID from output + rayJobIDChan := make(chan string) + + rayCmdStdOutScanner := bufio.NewScanner(rayCmdStdOut) + rayCmdStdErrScanner := bufio.NewScanner(rayCmdStdErr) + go func() { + for { + currStdToken := rayCmdStdOutScanner.Text() + // Running under assumption that scanner does not break up ray job name + if currStdToken != "" && rayJobID == "" && strings.Contains(currStdToken, "raysubmit") { + regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`) + // Search for rayjob name. Returns at least two string, first one has single quotes and second string does not have single quotes + match := regexExp.FindStringSubmatch(currStdToken) + if len(match) > 1 { + rayJobIDChan <- match[1] + } + } + if currStdToken != "" { + fmt.Println(currStdToken) + } + scanNotDone := rayCmdStdOutScanner.Scan() + if !scanNotDone { + break + } + } + }() + go func() { + for { + currErrToken := rayCmdStdErrScanner.Text() + if currErrToken != "" { + fmt.Fprintf(options.ioStreams.ErrOut, "%s\n", currErrToken) + } + scanNotDone := rayCmdStdErrScanner.Scan() + if !scanNotDone { + break + } + } + }() + + // Wait till rayJobID is populated + if rayJobID == "" { + rayJobID = <-rayJobIDChan + } + // Add annotation to RayJob with the correct ray job id and update the CR + options.RayJob, err = k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Get(ctx, options.RayJob.GetName(), v1.GetOptions{}) + if err != nil { + return fmt.Errorf("Failed to get latest version of Ray Job") + } + + options.RayJob.Spec.JobId = rayJobID + + _, err = k8sClients.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Update(ctx, options.RayJob, v1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("Error occurred when trying to add job ID to rayJob: %w", err) + } + + // Wait for ray job submit to finish. + err = cmd.Wait() + if err != nil { + return fmt.Errorf("Error occurred with ray job submit: %w", err) + } + return nil +} + +func (options *SubmitJobOptions) raySubmitCmd() ([]string, error) { + raySubmitCmd := []string{"ray", "job", "submit", "--address", dashboardAddr} + + if len(options.runtimeEnv) > 0 { + raySubmitCmd = append(raySubmitCmd, "--runtime-env", options.runtimeEnv) + } + if len(options.runtimeEnvJson) > 0 { + raySubmitCmd = append(raySubmitCmd, "--runtime-env-json", options.runtimeEnvJson) + } + if len(options.submissionID) > 0 { + raySubmitCmd = append(raySubmitCmd, "--submission-id", options.submissionID) + } + if options.entryPointCPU > 0 { + raySubmitCmd = append(raySubmitCmd, "--entrypoint-num-cpus", fmt.Sprintf("%f", options.entryPointCPU)) + } + if options.entryPointGPU > 0 { + raySubmitCmd = append(raySubmitCmd, "--entrypoint-num-gpus", fmt.Sprintf("%f", options.entryPointGPU)) + } + if options.entryPointMemory > 0 { + raySubmitCmd = append(raySubmitCmd, "--entrypoint-memory", fmt.Sprintf("%d", options.entryPointMemory)) + } + if len(options.entryPointResource) > 0 { + raySubmitCmd = append(raySubmitCmd, "--entrypoint-resource", options.entryPointResource) + } + if len(options.metadataJson) > 0 { + raySubmitCmd = append(raySubmitCmd, "--metadata-json", options.metadataJson) + } + if options.noWait { + raySubmitCmd = append(raySubmitCmd, "--no-wait") + } + if len(options.headers) > 0 { + raySubmitCmd = append(raySubmitCmd, "--headers", options.headers) + } + if len(options.verify) > 0 { + raySubmitCmd = append(raySubmitCmd, "--verify", options.verify) + } + if len(options.logStyle) > 0 { + raySubmitCmd = append(raySubmitCmd, "--log-style", options.logStyle) + } + if len(options.logColor) > 0 { + raySubmitCmd = append(raySubmitCmd, "--log-color", options.logColor) + } + + raySubmitCmd = append(raySubmitCmd, "--working-dir", options.workingDir) + + raySubmitCmd = append(raySubmitCmd, "--") + // Sanitize entrypoint + entryPointSanitized, err := shlex.Split(options.entryPoint) + if err != nil { + return nil, err + } + raySubmitCmd = append(raySubmitCmd, entryPointSanitized...) + + return raySubmitCmd, nil +} + +// Decode rayjob yaml if we decide to submit job using kube client +func decodeRayJobYaml(rayJobFilePath string) (*rayv1.RayJob, error) { + decodedRayJob := &rayv1.RayJob{} + + rayJobYamlContent, err := os.ReadFile(rayJobFilePath) + if err != nil { + return nil, err + } + decoder := rayscheme.Codecs.UniversalDecoder() + + _, _, err = decoder.Decode(rayJobYamlContent, nil, decodedRayJob) + if err != nil { + return nil, err + } + + return decodedRayJob, nil +} + +func runtimeEnvHasWorkingDir(runtimePath string) (string, error) { + runtimeEnvFileContent, err := os.ReadFile(runtimePath) + if err != nil { + return "", err + } + + var runtimeEnvYaml map[string]interface{} + err = yaml.Unmarshal(runtimeEnvFileContent, &runtimeEnvYaml) + if err != nil { + return "", err + } + + workingDir := runtimeEnvYaml["working_dir"].(string) + if workingDir != "" { + return workingDir, nil + } + + return "", nil +} + +func isRayClusterReady(rayCluster *rayv1.RayCluster) bool { + return meta.IsStatusConditionTrue(rayCluster.Status.Conditions, "Ready") || rayCluster.Status.State == rayv1.Ready //nolint:staticcheck // Still need to check State even though it is deprecated +} diff --git a/kubectl-plugin/pkg/cmd/job/job_submit_test.go b/kubectl-plugin/pkg/cmd/job/job_submit_test.go new file mode 100644 index 00000000000..9d70c7e8b0f --- /dev/null +++ b/kubectl-plugin/pkg/cmd/job/job_submit_test.go @@ -0,0 +1,225 @@ +package job + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/clientcmd/api" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +func TestRayJobSubmitComplete(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeSubmitJobOptions := NewJobSubmitOptions(testStreams) + fakeSubmitJobOptions.runtimeEnv = "././fake/path/to/env/yaml" + fakeSubmitJobOptions.fileName = "fake/path/to/rayjob.yaml" + + err := fakeSubmitJobOptions.Complete() + assert.Equal(t, "default", *fakeSubmitJobOptions.configFlags.Namespace) + assert.Nil(t, err) + assert.Equal(t, "fake/path/to/env/yaml", fakeSubmitJobOptions.runtimeEnv) +} + +func TestRayJobSubmitValidate(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + + testNS, testContext, testBT, testImpersonate := "test-namespace", "test-contet", "test-bearer-token", "test-person" + + // Fake directory for kubeconfig + fakeDir, err := os.MkdirTemp("", "fake-dir") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + // Set up fake config for kubeconfig + config := &api.Config{ + Clusters: map[string]*api.Cluster{ + "test-cluster": { + Server: "https://fake-kubernetes-cluster.example.com", + InsecureSkipTLSVerify: true, // For testing purposes + }, + }, + Contexts: map[string]*api.Context{ + "my-fake-context": { + Cluster: "my-fake-cluster", + AuthInfo: "my-fake-user", + }, + }, + CurrentContext: "my-fake-context", + AuthInfos: map[string]*api.AuthInfo{ + "my-fake-user": { + Token: "", // Empty for testing without authentication + }, + }, + } + + fakeFile := filepath.Join(fakeDir, ".kubeconfig") + + err = clientcmd.WriteToFile(*config, fakeFile) + assert.Nil(t, err) + + fakeConfigFlags := &genericclioptions.ConfigFlags{ + Namespace: &testNS, + Context: &testContext, + KubeConfig: &fakeFile, + BearerToken: &testBT, + Impersonate: &testImpersonate, + ImpersonateGroup: &[]string{"fake-group"}, + } + + rayYaml := `apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + submissionMode: 'InteractiveMode'` + + rayJobYamlPath := filepath.Join(fakeDir, "rayjob-temp-*.yaml") + + file, err := os.Create(rayJobYamlPath) + assert.Nil(t, err) + defer file.Close() + + _, err = file.Write([]byte(rayYaml)) + assert.Nil(t, err) + + tests := []struct { + name string + opts *SubmitJobOptions + expectError string + }{ + { + name: "Test validation when no context is set", + opts: &SubmitJobOptions{ + configFlags: genericclioptions.NewConfigFlags(false), + ioStreams: &testStreams, + }, + expectError: "no context is currently set, use \"kubectl config use-context \" to select a new one", + }, + { + name: "Successful submit job validation with RayJob", + opts: &SubmitJobOptions{ + configFlags: fakeConfigFlags, + ioStreams: &testStreams, + fileName: rayJobYamlPath, + workingDir: "Fake/File/Path", + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.opts.Validate() + if tc.expectError != "" { + assert.Equal(t, tc.expectError, err.Error()) + } else { + assert.Nil(t, err) + } + }) + } +} + +func TestDecodeRayJobYaml(t *testing.T) { + rayjobtmpfile, err := os.CreateTemp("./", "rayjob-temp-*.yaml") + assert.Nil(t, err) + + defer os.Remove(rayjobtmpfile.Name()) + + rayYaml := `apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + submissionMode: 'InteractiveMode'` + _, err = rayjobtmpfile.Write([]byte(rayYaml)) + assert.Nil(t, err) + + rayJobYamlActual, err := decodeRayJobYaml(filepath.Join("./", rayjobtmpfile.Name())) + assert.Nil(t, err) + + assert.Equal(t, "rayjob-sample", rayJobYamlActual.GetName()) + + submissionMode := rayJobYamlActual.Spec.SubmissionMode + assert.Equal(t, rayv1.InteractiveMode, submissionMode) +} + +func TestRuntimeEnvHasWorkingDir(t *testing.T) { + runtimeEnvFile, err := os.CreateTemp("./", "runtime-env-*.yaml") + assert.Nil(t, err) + + defer os.Remove(runtimeEnvFile.Name()) + + runTimeEnv := `pip: + - requests==2.26.0 + - pendulum==2.1.2 +env_vars: + counter_name: "test_counter" +working_dir: /fake/dir/ray_working_dir/ +` + _, err = runtimeEnvFile.Write([]byte(runTimeEnv)) + assert.Nil(t, err) + + runtimeEnvActual, err := runtimeEnvHasWorkingDir(filepath.Join("./", runtimeEnvFile.Name())) + assert.Nil(t, err) + + assert.NotEmpty(t, runtimeEnvActual) + assert.Equal(t, runtimeEnvActual, "/fake/dir/ray_working_dir/") +} + +func TestRaySubmitCmd(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeSubmitJobOptions := NewJobSubmitOptions(testStreams) + + fakeSubmitJobOptions.runtimeEnv = "/fake-runtime/path" + fakeSubmitJobOptions.runtimeEnvJson = "{\"env_vars\":{\"counter_name\":\"test_counter\"}" + fakeSubmitJobOptions.submissionID = "fake-submission-id12345" + fakeSubmitJobOptions.entryPointCPU = 2.0 + fakeSubmitJobOptions.entryPointGPU = 1.0 + fakeSubmitJobOptions.entryPointMemory = 600 + fakeSubmitJobOptions.entryPointResource = "{\"fake-resource\":{\"the-fake-resource\"}" + fakeSubmitJobOptions.noWait = true + fakeSubmitJobOptions.headers = "{\"requestHeaders\": {\"header\": \"header\"}}" + fakeSubmitJobOptions.verify = "True" + fakeSubmitJobOptions.workingDir = "/fake/working/dir" + fakeSubmitJobOptions.entryPoint = "python fake_python_script.py" + + actualCmd, err := fakeSubmitJobOptions.raySubmitCmd() + assert.Nil(t, err) + expectedCmd := []string{ + "ray", + "job", + "submit", + "--address", + dashboardAddr, + "--runtime-env", + "/fake-runtime/path", + "--runtime-env-json", + "{\"env_vars\":{\"counter_name\":\"test_counter\"}", + "--submission-id", + "fake-submission-id12345", + "--entrypoint-num-cpus", + "2.000000", + "--entrypoint-num-gpus", + "1.000000", + "--entrypoint-memory", + "600", + "--entrypoint-resource", + "{\"fake-resource\":{\"the-fake-resource\"}", + "--no-wait", + "--headers", + "{\"requestHeaders\": {\"header\": \"header\"}}", + "--verify", + "True", + "--working-dir", + "/fake/working/dir", + "--", + "python", + "fake_python_script.py", + } + + assert.Equal(t, expectedCmd, actualCmd) +} diff --git a/kubectl-plugin/pkg/cmd/log/log.go b/kubectl-plugin/pkg/cmd/log/log.go new file mode 100644 index 00000000000..9a898365397 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/log/log.go @@ -0,0 +1,411 @@ +package log + +import ( + "archive/tar" + "bytes" + "context" + "errors" + "fmt" + "io" + "log" + "math" + "net/url" + "os" + "path" + "path/filepath" + "strings" + + "github.com/spf13/cobra" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/genericclioptions" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/remotecommand" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion" +) + +const filePathInPod = "/tmp/ray/session_latest/logs/" + +type ClusterLogOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericclioptions.IOStreams + Executor RemoteExecutor + outputDir string + nodeType string + ResourceName string + ResourceType util.ResourceType +} + +var ( + logLong = templates.LongDesc(` + Download logs from a RayCluster and save them to a directory. + `) + + logExample = templates.Examples(` + # Download logs from a RayCluster and save them to a directory with the RayCluster's name. Retrieves 'all' logs + kubectl ray log my-raycluster + + # Download logs from a RayCluster and save them to a directory named /path/to/dir + kubectl ray log my-raycluster --out-dir /path/to/dir + + # Download logs from a RayCluster, but only for the head node + kubectl ray log my-raycluster --node-type head + + # Download logs from a RayCluster, but only for the worker nodes + kubectl ray log my-raycluster --node-type worker + + # Download all (worker node and head node) the logs from a RayCluster + kubectl ray log my-raycluster --node-type all + `) + + // flag to check if output directory is generated and needs to be deleted + deleteOutputDir = false +) + +func NewClusterLogOptions(streams genericclioptions.IOStreams) *ClusterLogOptions { + return &ClusterLogOptions{ + configFlags: genericclioptions.NewConfigFlags(true), + ioStreams: &streams, + Executor: &DefaultRemoteExecutor{}, + } +} + +func NewClusterLogCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewClusterLogOptions(streams) + // Initialize the factory for later use with the current config flag + cmdFactory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "log (RAYCLUSTER | TYPE/NAME) [--out-dir DIR_PATH] [--node-type all|head|worker]", + Short: "Get ray cluster log", + Long: logLong, + Example: logExample, + Aliases: []string{"logs"}, + SilenceUsage: true, + ValidArgsFunction: completion.RayClusterResourceNameCompletionFunc(cmdFactory), + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(cmd, args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), cmdFactory) + }, + } + cmd.Flags().StringVar(&options.outputDir, "out-dir", options.outputDir, "File Directory PATH of where to download the file logs to.") + cmd.Flags().StringVar(&options.nodeType, "node-type", options.nodeType, "Type of Ray node to download the files for, supports 'worker', 'head', or 'all'") + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *ClusterLogOptions) Complete(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + + if *options.configFlags.Namespace == "" { + *options.configFlags.Namespace = "default" + } + + typeAndName := strings.Split(args[0], "/") + if len(typeAndName) == 1 { + options.ResourceType = util.RayCluster + options.ResourceName = typeAndName[0] + } else { + if len(typeAndName) != 2 || typeAndName[1] == "" { + return cmdutil.UsageErrorf(cmd, "invalid resource type/name: %s", args[0]) + } + + switch strings.ToLower(typeAndName[0]) { + case string(util.RayCluster): + options.ResourceType = util.RayCluster + case string(util.RayJob): + options.ResourceType = util.RayJob + case string(util.RayService): + options.ResourceType = util.RayService + default: + return cmdutil.UsageErrorf(cmd, "unsupported resource type: %s", typeAndName[0]) + } + + options.ResourceName = typeAndName[1] + } + + if options.nodeType == "" { + options.nodeType = "all" + } else { + options.nodeType = strings.ToLower(options.nodeType) + } + + return nil +} + +func (options *ClusterLogOptions) Validate() error { + // Overrides and binds the kube config then retrieves the merged result + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + + if options.outputDir == "" { + fmt.Fprintln(options.ioStreams.Out, "No output directory specified, creating dir under current directory using resource name.") + options.outputDir = options.ResourceName + err := os.MkdirAll(options.outputDir, 0o755) + if err != nil { + return fmt.Errorf("could not create directory with cluster name %s: %w", options.outputDir, err) + } + deleteOutputDir = true + } + + switch options.nodeType { + case "all": + fmt.Fprintln(options.ioStreams.Out, "Command set to retrieve both head and worker node logs.") + case "head": + fmt.Fprintln(options.ioStreams.Out, "Command set to retrieve only head node logs.") + case "worker": + fmt.Fprintln(options.ioStreams.Out, "Command set to retrieve only worker node logs.") + default: + return fmt.Errorf("unknown node type `%s`", options.nodeType) + } + + info, err := os.Stat(options.outputDir) + if os.IsNotExist(err) { + return fmt.Errorf("Directory does not exist. Failed with: %w", err) + } else if err != nil { + return fmt.Errorf("Error occurred will checking directory: %w", err) + } else if !info.IsDir() { + return fmt.Errorf("Path is Not a directory. Please input a directory and try again") + } + + return nil +} + +func (options *ClusterLogOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + clientSet, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to retrieve kubernetes client set: %w", err) + } + + // Retrieve raycluster name for the non raycluster type node + var clusterName string + switch options.ResourceType { + case util.RayCluster: + clusterName = options.ResourceName + case util.RayJob: + rayJob, err := clientSet.RayClient().RayV1().RayJobs(*options.configFlags.Namespace).Get(ctx, options.ResourceName, v1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to retrieve rayjob info for %s: %w", options.ResourceName, err) + } + clusterName = rayJob.Status.RayClusterName + case util.RayService: + rayService, err := clientSet.RayClient().RayV1().RayServices(*options.configFlags.Namespace).Get(ctx, options.ResourceName, v1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to retrieve rayjob info for %s: %w", options.ResourceName, err) + } + clusterName = rayService.Status.ActiveServiceStatus.RayClusterName + default: + return fmt.Errorf("unsupported resource type: %s", options.ResourceType) + } + + // set the list options for the specified nodetype + var listopts v1.ListOptions + switch options.nodeType { + case "all": + listopts = v1.ListOptions{ + LabelSelector: fmt.Sprintf("ray.io/cluster=%s", clusterName), + } + case "head": + listopts = v1.ListOptions{ + LabelSelector: fmt.Sprintf("ray.io/node-type=head, ray.io/cluster=%s", clusterName), + } + case "worker": + listopts = v1.ListOptions{ + LabelSelector: fmt.Sprintf("ray.io/node-type=worker, ray.io/cluster=%s", clusterName), + } + default: + return fmt.Errorf("Unknown ray resource node type: %s", options.nodeType) + } + + // Get list of nodes that are considered the specified node type + rayNodes, err := clientSet.KubernetesClient().CoreV1().Pods(*options.configFlags.Namespace).List(ctx, listopts) + if err != nil { + return fmt.Errorf("failed to retrieve head node for RayCluster %s: %w", clusterName, err) + } + if len(rayNodes.Items) == 0 { + // Clean up the empty directory if the directory was generated. Since it will always be in current dir, only Remove() is used. + if deleteOutputDir { + os.Remove(options.outputDir) + } + return fmt.Errorf("No ray nodes found for resource %s", clusterName) + } + + // Get a list of logs of the ray nodes. + var logList []*bytes.Buffer + for _, rayNode := range rayNodes.Items { + // Since the first container is always the ray container, we will retrieve the first container logs + containerName := rayNode.Spec.Containers[0].Name + request := clientSet.KubernetesClient().CoreV1().Pods(rayNode.Namespace).GetLogs(rayNode.Name, &corev1.PodLogOptions{Container: containerName}) + + podLogs, err := request.Stream(ctx) + if err != nil { + return fmt.Errorf("Error retrieving log for RayCluster node %s: %w", rayNode.Name, err) + } + defer podLogs.Close() + + // Get current logs: + buf := new(bytes.Buffer) + _, err = io.Copy(buf, podLogs) + if err != nil { + return fmt.Errorf("Failed to get read current logs for RayCluster Node %s: %w", rayNode.Name, err) + } + + logList = append(logList, buf) + } + + // Pod file name format is name of the ray node + for ind, logList := range logList { + curFilePath := filepath.Join(options.outputDir, rayNodes.Items[ind].Name, "stdout.log") + dirPath := filepath.Join(options.outputDir, rayNodes.Items[ind].Name) + err := os.MkdirAll(dirPath, 0o755) + if err != nil { + return fmt.Errorf("failed to create directory within path %s: %w", dirPath, err) + } + file, err := os.OpenFile(curFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + return fmt.Errorf("failed to create/open file for kuberay-head with path: %s: %w", curFilePath, err) + } + defer file.Close() + + _, err = logList.WriteTo(file) + if err != nil { + return fmt.Errorf("failed to write to file for kuberay-head: %s: %w", rayNodes.Items[ind].Name, err) + } + + containerName := rayNodes.Items[ind].Spec.Containers[0].Name + req := clientSet.KubernetesClient().CoreV1().RESTClient(). + Get(). + Namespace(rayNodes.Items[ind].Namespace). + Resource("pods"). + Name(rayNodes.Items[ind].Name). + SubResource("exec"). + Param("container", containerName). + VersionedParams(&corev1.PodExecOptions{ + Command: []string{"tar", "--warning=no-file-changed", "-cf", "-", "-C", filePathInPod, "."}, + Stdin: true, + Stdout: true, + Stderr: true, + TTY: false, + }, clientgoscheme.ParameterCodec) + + restconfig, err := factory.ToRESTConfig() + if err != nil { + return fmt.Errorf("failed to get restconfig: %w", err) + } + + exec, err := options.Executor.CreateExecutor(restconfig, req.URL()) + if err != nil { + return fmt.Errorf("failed to create executor with error: %w", err) + } + + err = options.downloadRayLogFiles(ctx, exec, rayNodes.Items[ind]) + if err != nil { + return fmt.Errorf("failed to download ray head log files with error: %w", err) + } + } + return nil +} + +// RemoteExecutor creates the executor for executing exec on the pod - provided for testing purposes +type RemoteExecutor interface { + CreateExecutor(restConfig *rest.Config, url *url.URL) (remotecommand.Executor, error) +} + +type DefaultRemoteExecutor struct{} + +// CreateExecutor returns the executor created by NewSPDYExecutor +func (dre *DefaultRemoteExecutor) CreateExecutor(restConfig *rest.Config, url *url.URL) (remotecommand.Executor, error) { + return remotecommand.NewSPDYExecutor(restConfig, "POST", url) +} + +// downloadRayLogFiles will use to the executor and retrieve the logs file from the inputted ray head +func (options *ClusterLogOptions) downloadRayLogFiles(ctx context.Context, exec remotecommand.Executor, rayNode corev1.Pod) error { + outreader, outStream := io.Pipe() + go func() { + defer outStream.Close() + err := exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdin: options.ioStreams.In, + Stdout: outStream, + Stderr: options.ioStreams.ErrOut, + Tty: false, + }) + if err != nil { + log.Fatalf("Error occurred while calling remote command: %v", err) + } + }() + + // Goes through the tar and create/copy them one by one into the destination dir + tarReader := tar.NewReader(outreader) + header, err := tarReader.Next() + if err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("error will extracting head tar file for ray head %s: %w", rayNode.Name, err) + } + + fmt.Fprintf(options.ioStreams.Out, "Downloading log for Ray Node %s\n", rayNode.Name) + for !errors.Is(err, io.EOF) { + if err != nil { + return fmt.Errorf("Error reading tar archive: %w", err) + } + + // Construct the full local path and a directory for the tmp file logs + localFilePath := filepath.Join(path.Clean(options.outputDir), path.Clean(rayNode.Name), path.Clean(header.Name)) + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(localFilePath, 0o755); err != nil { + return fmt.Errorf("Error creating directory: %w", err) + } + case tar.TypeReg: + // Check for overflow: G115 + if header.Mode < 0 || header.Mode > math.MaxUint32 { + fmt.Fprintf(options.ioStreams.Out, "file mode out side of accceptable value %d skipping file", header.Mode) + } + // Create file and write contents + outFile, err := os.OpenFile(localFilePath, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) //nolint:gosec // lint failing due to file mode conversion from uint64 to int32, checked above + if err != nil { + return fmt.Errorf("Error creating file: %w", err) + } + defer outFile.Close() + // This is to limit the copy size for a decompression bomb, currently set arbitrarily + for { + n, err := io.CopyN(outFile, tarReader, 1000000) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return fmt.Errorf("failed while writing to file: %w", err) + } + if n == 0 { + break + } + } + default: + fmt.Printf("Ignoring unsupported file type: %b", header.Typeflag) + } + + header, err = tarReader.Next() + if header == nil && err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("error while extracting tar file with error: %w", err) + } + } + + return nil +} diff --git a/kubectl-plugin/pkg/cmd/log/log_test.go b/kubectl-plugin/pkg/cmd/log/log_test.go new file mode 100644 index 00000000000..4e8995621fa --- /dev/null +++ b/kubectl-plugin/pkg/cmd/log/log_test.go @@ -0,0 +1,548 @@ +package log + +import ( + "archive/tar" + "bytes" + "context" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "testing" + "time" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/genericiooptions" + "k8s.io/cli-runtime/pkg/resource" + "k8s.io/client-go/rest" + restclient "k8s.io/client-go/rest" + "k8s.io/client-go/rest/fake" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/clientcmd/api" + "k8s.io/client-go/tools/remotecommand" + cmdtesting "k8s.io/kubectl/pkg/cmd/testing" + "k8s.io/kubectl/pkg/scheme" +) + +// Mocked NewSPDYExecutor +var fakeNewSPDYExecutor = func(method string, url *url.URL, inputbuf *bytes.Buffer) (remotecommand.Executor, error) { + return &fakeExecutor{method: method, url: url, buf: inputbuf}, nil +} + +type fakeExecutor struct { + url *url.URL + buf *bytes.Buffer + method string +} + +// Stream is needed for implementing remotecommand.Execute +func (f *fakeExecutor) Stream(_ remotecommand.StreamOptions) error { + return nil +} + +// downloadRayLogFiles uses StreamWithContext so this is the real function that we are mocking +func (f *fakeExecutor) StreamWithContext(_ context.Context, options remotecommand.StreamOptions) error { + _, err := io.Copy(options.Stdout, f.buf) + return err +} + +// createFakeTarFile creates the fake tar file that will be used for testing +func createFakeTarFile() (*bytes.Buffer, error) { + // Create a buffer to hold the tar archive + tarbuff := new(bytes.Buffer) + + // Create a tar writer + tw := tar.NewWriter(tarbuff) + + // Define the files/directories to include + files := []struct { + ModTime time.Time + Name string + Body string + IsDir bool + Mode int64 + }{ + {time.Now(), "/", "", true, 0o755}, + {time.Now(), "file1.txt", "This is the content of file1.txt\n", false, 0o644}, + {time.Now(), "file2.txt", "Content of file2.txt inside subdir\n", false, 0o644}, + } + + // Add each file/directory to the tar archive + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: file.Mode, + ModTime: file.ModTime, + Size: int64(len(file.Body)), + } + if file.IsDir { + hdr.Typeflag = tar.TypeDir + } else { + hdr.Typeflag = tar.TypeReg + } + + // Write the header + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + + // Write the file content (if not a directory) + if !file.IsDir { + if _, err := tw.Write([]byte(file.Body)); err != nil { + return nil, err + } + } + } + + // Close the tar writer + if err := tw.Close(); err != nil { + return nil, err + } + return tarbuff, nil +} + +type FakeRemoteExecutor struct{} + +func (dre *FakeRemoteExecutor) CreateExecutor(_ *rest.Config, url *url.URL) (remotecommand.Executor, error) { + return fakeNewSPDYExecutor("GET", url, new(bytes.Buffer)) +} + +func TestRayClusterLogComplete(t *testing.T) { + cmd := &cobra.Command{Use: "log"} + + tests := []struct { + name string + nodeType string + expectedResourceType util.ResourceType + expectedResourceName string + expectedNodeType string + args []string + hasErr bool + }{ + { + name: "valide request with raycluster with empty nodetype input", + expectedResourceType: util.RayCluster, + expectedResourceName: "test-raycluster", + expectedNodeType: "all", + args: []string{"test-raycluster"}, + hasErr: false, + }, + { + name: "valide request with raycluster", + expectedResourceType: util.RayCluster, + expectedResourceName: "test-raycluster", + args: []string{"rayCluster/test-raycluster"}, + expectedNodeType: "all", + hasErr: false, + }, + { + name: "valide request with rayservice", + expectedResourceType: util.RayService, + expectedResourceName: "test-rayService", + args: []string{"rayService/test-rayService"}, + expectedNodeType: "all", + hasErr: false, + }, + { + name: "valide request with rayjob", + expectedResourceType: util.RayJob, + expectedResourceName: "test-rayJob", + args: []string{"rayJob/test-rayJob"}, + expectedNodeType: "all", + hasErr: false, + }, + { + name: "invalid args (no args)", + args: []string{}, + hasErr: true, + }, + { + name: "invalid args (too many args)", + args: []string{"raycluster/test-raycluster", "extra-arg"}, + hasErr: true, + }, + { + name: "invalid args (no resource type)", + args: []string{"/test-resource"}, + hasErr: true, + }, + { + name: "invalid args (no resource name)", + args: []string{"raycluster/"}, + hasErr: true, + }, + { + name: "invalid args (invalid resource type)", + args: []string{"invalid-type/test-resource"}, + hasErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + fakeClusterLogOptions := NewClusterLogOptions(testStreams) + err := fakeClusterLogOptions.Complete(cmd, tc.args) + if tc.hasErr { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.expectedResourceType, fakeClusterLogOptions.ResourceType) + assert.Equal(t, tc.expectedResourceName, fakeClusterLogOptions.ResourceName) + assert.Equal(t, tc.expectedNodeType, fakeClusterLogOptions.nodeType) + } + }) + } +} + +func TestRayClusterLogValidate(t *testing.T) { + testStreams, _, _, _ := genericclioptions.NewTestIOStreams() + + testNS, testContext, testBT, testImpersonate := "test-namespace", "test-contet", "test-bearer-token", "test-person" + + // Fake directory for kubeconfig + fakeDir, err := os.MkdirTemp("", "fake-config") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + // Set up fake config for kubeconfig + config := &api.Config{ + Clusters: map[string]*api.Cluster{ + "test-cluster": { + Server: "https://fake-kubernetes-cluster.example.com", + InsecureSkipTLSVerify: true, // For testing purposes + }, + }, + Contexts: map[string]*api.Context{ + "my-fake-context": { + Cluster: "my-fake-cluster", + AuthInfo: "my-fake-user", + }, + }, + CurrentContext: "my-fake-context", + AuthInfos: map[string]*api.AuthInfo{ + "my-fake-user": { + Token: "", // Empty for testing without authentication + }, + }, + } + + fakeFile := filepath.Join(fakeDir, ".kubeconfig") + + if err := clientcmd.WriteToFile(*config, fakeFile); err != nil { + t.Fatalf("Failed to write kubeconfig to temp file: %v", err) + } + + // Initialize the fake config flag with the fake kubeconfig and values + fakeConfigFlags := &genericclioptions.ConfigFlags{ + Namespace: &testNS, + Context: &testContext, + KubeConfig: &fakeFile, + BearerToken: &testBT, + Impersonate: &testImpersonate, + ImpersonateGroup: &[]string{"fake-group"}, + } + + tests := []struct { + name string + opts *ClusterLogOptions + expect string + expectError string + }{ + { + name: "Test validation when no context is set", + opts: &ClusterLogOptions{ + configFlags: genericclioptions.NewConfigFlags(false), + outputDir: fakeDir, + ResourceName: "fake-cluster", + nodeType: "head", + ioStreams: &testStreams, + }, + expectError: "no context is currently set, use \"kubectl config use-context \" to select a new one", + }, + { + name: "Test validation when node type is `random-string`", + opts: &ClusterLogOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + outputDir: fakeDir, + ResourceName: "fake-cluster", + nodeType: "random-string", + ioStreams: &testStreams, + }, + expectError: "unknown node type `random-string`", + }, + { + name: "Successful validation call", + opts: &ClusterLogOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + outputDir: fakeDir, + ResourceName: "fake-cluster", + nodeType: "head", + ioStreams: &testStreams, + }, + expectError: "", + }, + { + name: "Validate output directory when no out-dir is set.", + opts: &ClusterLogOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + outputDir: "", + ResourceName: "fake-cluster", + nodeType: "head", + ioStreams: &testStreams, + }, + expectError: "", + }, + { + name: "Failed validation call with output directory not exist", + opts: &ClusterLogOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + outputDir: "randomPath-here", + ResourceName: "fake-cluster", + nodeType: "head", + ioStreams: &testStreams, + }, + expectError: "Directory does not exist. Failed with: stat randomPath-here: no such file or directory", + }, + { + name: "Failed validation call with output directory is file", + opts: &ClusterLogOptions{ + // Use fake config to bypass the config flag checks + configFlags: fakeConfigFlags, + outputDir: fakeFile, + ResourceName: "fake-cluster", + nodeType: "head", + ioStreams: &testStreams, + }, + expectError: "Path is Not a directory. Please input a directory and try again", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := tc.opts.Validate() + if tc.expectError != "" { + assert.Equal(t, tc.expectError, err.Error()) + } else { + if tc.opts.outputDir == "" { + assert.Equal(t, tc.opts.ResourceName, tc.opts.outputDir) + } + assert.True(t, err == nil) + } + }) + } +} + +func TestRayClusterLogRun(t *testing.T) { + tf := cmdtesting.NewTestFactory().WithNamespace("test") + defer tf.Cleanup() + + fakeDir, err := os.MkdirTemp("", "fake-directory") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + testStreams, _, _, _ := genericiooptions.NewTestIOStreams() + + fakeClusterLogOptions := NewClusterLogOptions(testStreams) + // Uses the mocked executor + fakeClusterLogOptions.Executor = &FakeRemoteExecutor{} + fakeClusterLogOptions.ResourceName = "test-cluster" + fakeClusterLogOptions.outputDir = fakeDir + fakeClusterLogOptions.ResourceType = util.RayCluster + fakeClusterLogOptions.nodeType = "all" + + // Create list of fake ray heads + rayHeadsList := &v1.PodList{ + ListMeta: metav1.ListMeta{ + ResourceVersion: "15", + }, + Items: []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kuberay-head-1", + Namespace: "test", + Labels: map[string]string{ + "ray.io/group": "headgroup", + "ray.io/clusters": "test-cluster", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "mycontainer", + Image: "nginx:latest", + }, + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + PodIP: "10.0.0.1", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kuberay-head-2", + Namespace: "test", + Labels: map[string]string{ + "ray.io/group": "headgroup", + "ray.io/clusters": "test-cluster", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "anothercontainer", + Image: "busybox:latest", + }, + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + }, + } + + // create logs for multiple head pods and turn them into io streams so they can be returned with the fake client + fakeLogs := []string{ + "This is some fake log data for first pod.\nStill first pod logs\n", + "This is some fake log data for second pod.\nStill second pod logs\n", + } + logReader1 := io.NopCloser(bytes.NewReader([]byte(fakeLogs[0]))) + logReader2 := io.NopCloser(bytes.NewReader([]byte(fakeLogs[1]))) + + // fakes the client and the REST calls. + codec := scheme.Codecs.LegacyCodec(scheme.Scheme.PrioritizedVersionsAllGroups()...) + tf.Client = &fake.RESTClient{ + GroupVersion: v1.SchemeGroupVersion, + NegotiatedSerializer: resource.UnstructuredPlusDefaultContentConfig().NegotiatedSerializer, + Client: fake.CreateHTTPClient(func(req *http.Request) (*http.Response, error) { + switch req.URL.Path { + case "/api/v1/pods": + return &http.Response{StatusCode: http.StatusOK, Header: cmdtesting.DefaultHeader(), Body: cmdtesting.ObjBody(codec, rayHeadsList)}, nil + case "/api/v1/namespaces/test/pods/test-cluster-kuberay-head-1/log": + return &http.Response{StatusCode: http.StatusOK, Header: cmdtesting.DefaultHeader(), Body: logReader1}, nil + case "/api/v1/namespaces/test/pods/test-cluster-kuberay-head-2/log": + return &http.Response{StatusCode: http.StatusOK, Header: cmdtesting.DefaultHeader(), Body: logReader2}, nil + default: + t.Fatalf("request url: %#v,and request: %#v", req.URL, req) + return nil, nil + } + }), + } + + tf.ClientConfigVal = &restclient.Config{ + ContentConfig: restclient.ContentConfig{GroupVersion: &v1.SchemeGroupVersion}, + } + + err = fakeClusterLogOptions.Run(context.Background(), tf) + assert.Nil(t, err) + + // Check that the two directories are there + entries, err := os.ReadDir(fakeDir) + assert.Nil(t, err) + assert.Equal(t, 2, len(entries)) + + assert.Equal(t, "test-cluster-kuberay-head-1", entries[0].Name()) + assert.Equal(t, "test-cluster-kuberay-head-2", entries[1].Name()) + + // Check the first directory for the logs + for ind, entry := range entries { + currPath := filepath.Join(fakeDir, entry.Name()) + currDir, err := os.ReadDir(currPath) + assert.Nil(t, err) + assert.Equal(t, 1, len(currDir)) + openfile, err := os.Open(filepath.Join(currPath, "stdout.log")) + assert.Nil(t, err) + actualContent, err := io.ReadAll(openfile) + assert.Nil(t, err) + assert.Equal(t, fakeLogs[ind], string(actualContent)) + } +} + +func TestDownloadRayLogFiles(t *testing.T) { + fakeDir, err := os.MkdirTemp("", "fake-directory") + assert.Nil(t, err) + defer os.RemoveAll(fakeDir) + + testStreams, _, _, _ := genericiooptions.NewTestIOStreams() + + fakeClusterLogOptions := NewClusterLogOptions(testStreams) + fakeClusterLogOptions.ResourceName = "test-cluster" + fakeClusterLogOptions.outputDir = fakeDir + + // create fake tar files to test + fakeTar, err := createFakeTarFile() + assert.Nil(t, err) + + // Ray head needed for calling the downloadRayLogFiles command + rayHead := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kuberay-head-1", + Namespace: "test", + Labels: map[string]string{ + "ray.io/group": "headgroup", + "ray.io/clusters": "test-cluster", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "mycontainer", + Image: "nginx:latest", + }, + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + PodIP: "10.0.0.1", + }, + } + + executor, _ := fakeNewSPDYExecutor("GET", &url.URL{}, fakeTar) + + err = fakeClusterLogOptions.downloadRayLogFiles(context.Background(), executor, rayHead) + assert.Nil(t, err) + + entries, err := os.ReadDir(fakeDir) + assert.Nil(t, err) + assert.Equal(t, 1, len(entries)) + + // Assert the files + assert.True(t, entries[0].IsDir()) + files, err := os.ReadDir(filepath.Join(fakeDir, entries[0].Name())) + assert.Nil(t, err) + assert.Equal(t, 2, len(files)) + + expectedfileoutput := []struct { + Name string + Body string + }{ + {"file1.txt", "This is the content of file1.txt\n"}, + {"file2.txt", "Content of file2.txt inside subdir\n"}, + } + + // Goes through and check the temp directory with the downloaded files + for ind, file := range files { + fileInfo, err := file.Info() + assert.Nil(t, err) + curr := expectedfileoutput[ind] + + assert.Equal(t, curr.Name, fileInfo.Name()) + openfile, err := os.Open(filepath.Join(fakeDir, entries[0].Name(), file.Name())) + assert.Nil(t, err) + actualContent, err := io.ReadAll(openfile) + assert.Nil(t, err) + assert.Equal(t, curr.Body, string(actualContent)) + } +} diff --git a/kubectl-plugin/pkg/cmd/ray.go b/kubectl-plugin/pkg/cmd/ray.go new file mode 100644 index 00000000000..1d3a8116eb3 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/ray.go @@ -0,0 +1,40 @@ +package cmd + +import ( + "k8s.io/cli-runtime/pkg/genericiooptions" + + "github.com/spf13/cobra" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/create" + kubectlraydelete "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/delete" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/get" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/job" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/log" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/session" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/cmd/version" +) + +func NewRayCommand(streams genericiooptions.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "ray", + Short: "ray kubectl plugin", + Long: "Manage RayCluster resources.", + SilenceUsage: true, + Run: func(cmd *cobra.Command, args []string) { + cmd.HelpFunc()(cmd, args) + }, + CompletionOptions: cobra.CompletionOptions{ + DisableDefaultCmd: true, + }, + } + + cmd.AddCommand(get.NewGetCommand(streams)) + cmd.AddCommand(session.NewSessionCommand(streams)) + cmd.AddCommand(log.NewClusterLogCommand(streams)) + cmd.AddCommand(job.NewJobCommand(streams)) + cmd.AddCommand(version.NewVersionCommand(streams)) + cmd.AddCommand(create.NewCreateCommand(streams)) + cmd.AddCommand(kubectlraydelete.NewDeleteCommand(streams)) + + return cmd +} diff --git a/kubectl-plugin/pkg/cmd/session/session.go b/kubectl-plugin/pkg/cmd/session/session.go new file mode 100644 index 00000000000..533a9f28d17 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/session/session.go @@ -0,0 +1,204 @@ +package session + +import ( + "context" + "fmt" + "os/exec" + "strings" + "sync" + "time" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/completion" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/cli-runtime/pkg/genericiooptions" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/templates" +) + +type appPort struct { + name string + port int +} + +type SessionOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericiooptions.IOStreams + ResourceType util.ResourceType + ResourceName string + Namespace string +} + +var ( + dashboardPort = appPort{ + name: "Ray Dashboard", + port: 8265, + } + clientPort = appPort{ + name: "Ray Interactive Client", + port: 10001, + } + servePort = appPort{ + name: "Ray Serve", + port: 8000, + } +) + +var ( + sessionLong = templates.LongDesc(` + Forward local ports to the Ray resources. + + Forward different local ports depending on the resource type: RayCluster, RayJob, or RayService. + `) + + sessionExample = templates.Examples(` + # Without specifying the resource type, forward local ports to the RayCluster resource + kubectl ray session my-raycluster + + # Forward local ports to the RayCluster resource + kubectl ray session raycluster/my-raycluster + + # Forward local ports to the RayCluster used for the RayJob resource + kubectl ray session rayjob/my-rayjob + + # Forward local ports to the RayCluster used for the RayService resource + kubectl ray session rayservice/my-rayservice + `) +) + +func NewSessionOptions(streams genericiooptions.IOStreams) *SessionOptions { + configFlags := genericclioptions.NewConfigFlags(true) + return &SessionOptions{ + ioStreams: &streams, + configFlags: configFlags, + } +} + +func NewSessionCommand(streams genericiooptions.IOStreams) *cobra.Command { + options := NewSessionOptions(streams) + factory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "session (RAYCLUSTER | TYPE/NAME)", + Short: "Forward local ports to the Ray resources.", + Long: sessionLong, + Example: sessionExample, + ValidArgsFunction: completion.RayClusterResourceNameCompletionFunc(factory), + RunE: func(cmd *cobra.Command, args []string) error { + if err := options.Complete(cmd, args); err != nil { + return err + } + if err := options.Validate(); err != nil { + return err + } + return options.Run(cmd.Context(), factory) + }, + } + options.configFlags.AddFlags(cmd.Flags()) + return cmd +} + +func (options *SessionOptions) Complete(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmdutil.UsageErrorf(cmd, "%s", cmd.Use) + } + + typeAndName := strings.Split(args[0], "/") + if len(typeAndName) == 1 { + options.ResourceType = util.RayCluster + options.ResourceName = typeAndName[0] + } else { + if len(typeAndName) != 2 || typeAndName[1] == "" { + return cmdutil.UsageErrorf(cmd, "invalid resource type/name: %s", args[0]) + } + + switch typeAndName[0] { + case string(util.RayCluster): + options.ResourceType = util.RayCluster + case string(util.RayJob): + options.ResourceType = util.RayJob + case string(util.RayService): + options.ResourceType = util.RayService + default: + return cmdutil.UsageErrorf(cmd, "unsupported resource type: %s", typeAndName[0]) + } + + options.ResourceName = typeAndName[1] + } + + if *options.configFlags.Namespace == "" { + options.Namespace = "default" + } else { + options.Namespace = *options.configFlags.Namespace + } + + return nil +} + +func (options *SessionOptions) Validate() error { + // Overrides and binds the kube config then retrieves the merged result + config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig() + if err != nil { + return fmt.Errorf("Error retrieving raw config: %w", err) + } + if len(config.CurrentContext) == 0 { + return fmt.Errorf("no context is currently set, use %q to select a new one", "kubectl config use-context ") + } + return nil +} + +func (options *SessionOptions) Run(ctx context.Context, factory cmdutil.Factory) error { + k8sClient, err := client.NewClient(factory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + + svcName, err := k8sClient.GetRayHeadSvcName(ctx, options.Namespace, options.ResourceType, options.ResourceName) + if err != nil { + return err + } + fmt.Printf("Forwarding ports to service %s\n", svcName) + + var appPorts []appPort + switch options.ResourceType { + case util.RayCluster: + appPorts = []appPort{dashboardPort, clientPort} + case util.RayJob: + appPorts = []appPort{dashboardPort} + case util.RayService: + appPorts = []appPort{dashboardPort, servePort} + default: + return fmt.Errorf("unsupported resource type: %s", options.ResourceType) + } + + kubectlArgs := []string{"port-forward", "-n", options.Namespace, "service/" + svcName} + for _, appPort := range appPorts { + kubectlArgs = append(kubectlArgs, fmt.Sprintf("%d:%d", appPort.port, appPort.port)) + } + + for _, appPort := range appPorts { + fmt.Printf("%s: http://localhost:%d\n", appPort.name, appPort.port) + } + fmt.Println() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for { + const reconnectDelay = 100 + var err error + portforwardCmd := exec.Command("kubectl", kubectlArgs...) + if err = portforwardCmd.Run(); err == nil { + return + } + fmt.Printf("failed to port-forward: %v, try to reconnect after %d miliseconds...\n", err, reconnectDelay) + time.Sleep(reconnectDelay * time.Millisecond) + } + }() + + wg.Wait() + return nil +} diff --git a/kubectl-plugin/pkg/cmd/session/session_test.go b/kubectl-plugin/pkg/cmd/session/session_test.go new file mode 100644 index 00000000000..015336d2783 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/session/session_test.go @@ -0,0 +1,112 @@ +package session + +import ( + "testing" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "k8s.io/cli-runtime/pkg/genericiooptions" +) + +func TestComplete(t *testing.T) { + cmd := &cobra.Command{Use: "session"} + + tests := []struct { + name string + namespace string + expectedResourceType util.ResourceType + expectedNamespace string + expectedName string + args []string + hasErr bool + }{ + { + name: "valid raycluster without namespace", + namespace: "", + args: []string{"raycluster/test-raycluster"}, + expectedResourceType: util.RayCluster, + expectedNamespace: "default", + expectedName: "test-raycluster", + hasErr: false, + }, + { + name: "valid raycluster with namespace", + namespace: "test-namespace", + args: []string{"raycluster/test-raycluster"}, + expectedResourceType: util.RayCluster, + expectedNamespace: "test-namespace", + expectedName: "test-raycluster", + hasErr: false, + }, + { + name: "valid rayjob without namespace", + namespace: "", + args: []string{"rayjob/test-rayjob"}, + expectedResourceType: util.RayJob, + expectedNamespace: "default", + expectedName: "test-rayjob", + hasErr: false, + }, + { + name: "valid rayservice without namespace", + namespace: "", + args: []string{"rayservice/test-rayservice"}, + expectedResourceType: util.RayService, + expectedNamespace: "default", + expectedName: "test-rayservice", + hasErr: false, + }, + { + name: "no slash default to raycluster", + namespace: "", + args: []string{"test-resource"}, + expectedResourceType: util.RayCluster, + expectedNamespace: "default", + expectedName: "test-resource", + hasErr: false, + }, + { + name: "invalid args (no args)", + args: []string{}, + hasErr: true, + }, + { + name: "invalid args (too many args)", + args: []string{"raycluster/test-raycluster", "extra-arg"}, + hasErr: true, + }, + { + name: "invalid args (no resource type)", + args: []string{"/test-resource"}, + hasErr: true, + }, + { + name: "invalid args (no resource name)", + args: []string{"raycluster/"}, + hasErr: true, + }, + { + name: "invalid args (invalid resource type)", + args: []string{"invalid-type/test-resource"}, + hasErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + testStreams, _, _, _ := genericiooptions.NewTestIOStreams() + fakeSessionOptions := NewSessionOptions(testStreams) + fakeSessionOptions.configFlags.Namespace = &tc.namespace + err := fakeSessionOptions.Complete(cmd, tc.args) + if tc.hasErr { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.expectedNamespace, fakeSessionOptions.Namespace) + assert.Equal(t, tc.expectedResourceType, fakeSessionOptions.ResourceType) + assert.Equal(t, tc.expectedName, fakeSessionOptions.ResourceName) + } + }) + } +} diff --git a/kubectl-plugin/pkg/cmd/version/version.go b/kubectl-plugin/pkg/cmd/version/version.go new file mode 100644 index 00000000000..7028813cf19 --- /dev/null +++ b/kubectl-plugin/pkg/cmd/version/version.go @@ -0,0 +1,52 @@ +package version + +import ( + "fmt" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + cmdutil "k8s.io/kubectl/pkg/cmd/util" +) + +var Version = "development" + +type VersionOptions struct { + configFlags *genericclioptions.ConfigFlags + ioStreams *genericclioptions.IOStreams +} + +func NewVersionOptions(streams genericclioptions.IOStreams) *VersionOptions { + return &VersionOptions{ + configFlags: genericclioptions.NewConfigFlags(true), + ioStreams: &streams, + } +} + +func NewVersionCommand(streams genericclioptions.IOStreams) *cobra.Command { + options := NewVersionOptions(streams) + cmdFactory := cmdutil.NewFactory(options.configFlags) + + cmd := &cobra.Command{ + Use: "version", + Short: "Output the version of the Ray kubectl plugin and KubeRay operator", + RunE: func(cmd *cobra.Command, _ []string) error { + fmt.Println("kubectl ray plugin version:", Version) + + kubeClient, err := client.NewClient(cmdFactory) + if err != nil { + return fmt.Errorf("failed to create client: %w", err) + } + + operatorVersion, err := kubeClient.GetKubeRayOperatorVersion(cmd.Context()) + if err != nil { + fmt.Println("Warning: KubeRay operator installation cannot be found - did you install it with the name \"kuberay-operator\"?") + } else { + fmt.Println("KubeRay operator version:", operatorVersion) + } + return nil + }, + } + + return cmd +} diff --git a/kubectl-plugin/pkg/util/client/client.go b/kubectl-plugin/pkg/util/client/client.go new file mode 100644 index 00000000000..8eb71038bd7 --- /dev/null +++ b/kubectl-plugin/pkg/util/client/client.go @@ -0,0 +1,136 @@ +package client + +import ( + "context" + "fmt" + "strings" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + + rayclient "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned" +) + +type Client interface { + KubernetesClient() kubernetes.Interface + RayClient() rayclient.Interface + // GetRayHeadSvcName retrieves the name of RayHead service for the given RayCluster, RayJob, or RayService. + GetRayHeadSvcName(ctx context.Context, namespace string, resourceType util.ResourceType, name string) (string, error) + GetKubeRayOperatorVersion(ctx context.Context) (string, error) +} + +type k8sClient struct { + kubeClient kubernetes.Interface + rayClient rayclient.Interface +} + +func NewClient(factory cmdutil.Factory) (Client, error) { + kubeClient, err := factory.KubernetesClientSet() + if err != nil { + return nil, err + } + restConfig, err := factory.ToRESTConfig() + if err != nil { + return nil, err + } + + rayClient, err := rayclient.NewForConfig(restConfig) + if err != nil { + return nil, err + } + + return &k8sClient{ + kubeClient: kubeClient, + rayClient: rayClient, + }, nil +} + +func NewClientForTesting(kubeClient kubernetes.Interface, rayClient rayclient.Interface) Client { + return &k8sClient{ + kubeClient: kubeClient, + rayClient: rayClient, + } +} + +func (c *k8sClient) KubernetesClient() kubernetes.Interface { + return c.kubeClient +} + +func (c *k8sClient) RayClient() rayclient.Interface { + return c.rayClient +} + +func (c *k8sClient) GetKubeRayOperatorVersion(ctx context.Context) (string, error) { + deployment, err := c.kubeClient.AppsV1().Deployments("").List(ctx, metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name in (kuberay-operator,kuberay)", + }) + if err != nil { + return "", fmt.Errorf("failed to get KubeRay operator deployment: %w", err) + } + + if len(deployment.Items) == 0 { + return "", fmt.Errorf("no KubeRay operator deployments found in any namespace") + } + + containers := deployment.Items[0].Spec.Template.Spec.Containers + if len(containers) == 0 { + return "", fmt.Errorf("no containers found in KubeRay operator deployment") + } + + image := containers[0].Image + parts := strings.Split(image, ":") + if len(parts) < 2 { + return "", fmt.Errorf("unable to parse KubeRay operator version from image: %s", image) + } + + return parts[len(parts)-1], nil +} + +func (c *k8sClient) GetRayHeadSvcName(ctx context.Context, namespace string, resourceType util.ResourceType, name string) (string, error) { + switch resourceType { + case util.RayCluster: + return c.getRayHeadSvcNameByRayCluster(ctx, namespace, name) + case util.RayJob: + return c.getRayHeadSvcNameByRayJob(ctx, namespace, name) + case util.RayService: + return c.getRayHeadSvcNameByRayService(ctx, namespace, name) + default: + return "", fmt.Errorf("unsupported resource type: %s", resourceType) + } +} + +func (c *k8sClient) getRayHeadSvcNameByRayCluster(ctx context.Context, namespace string, name string) (string, error) { + rayCluster, err := c.RayClient().RayV1().RayClusters(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("unable to find RayCluster %s: %w", name, err) + } + svcName := rayCluster.Status.Head.ServiceName + return svcName, nil +} + +func (c *k8sClient) getRayHeadSvcNameByRayJob(ctx context.Context, namespace string, name string) (string, error) { + rayJob, err := c.RayClient().RayV1().RayJobs(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("unable to find RayJob %s: %w", name, err) + } + svcName := rayJob.Status.RayClusterStatus.Head.ServiceName + return svcName, nil +} + +// There are 3 services associated with a RayService: +// - -head-svc +// - -serve-svc +// - -head-svc +// This function retrieves the name of the -head-svc service. +// Actually there is no difference between which service to use, because kubectl port-forward source code first tries to find the underlying pod. +// See https://github.com/kubernetes/kubectl/blob/262825a8a665c7cae467dfaa42b63be5a5b8e5a2/pkg/cmd/portforward/portforward.go#L345 for details. +func (c *k8sClient) getRayHeadSvcNameByRayService(ctx context.Context, namespace string, name string) (string, error) { + rayService, err := c.RayClient().RayV1().RayServices(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("unable to find RayService %s: %w", name, err) + } + svcName := rayService.Status.ActiveServiceStatus.RayClusterStatus.Head.ServiceName + return svcName, nil +} diff --git a/kubectl-plugin/pkg/util/client/client_test.go b/kubectl-plugin/pkg/util/client/client_test.go new file mode 100644 index 00000000000..b0111f2bcaa --- /dev/null +++ b/kubectl-plugin/pkg/util/client/client_test.go @@ -0,0 +1,331 @@ +package client + +import ( + "context" + "testing" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" + "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + kubeFake "k8s.io/client-go/kubernetes/fake" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayClientFake "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/fake" +) + +func TestGetKubeRayOperatorVersion(t *testing.T) { + helmKubeObjects := []runtime.Object{ + &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kuberay-operator-helm-chart", + Namespace: "default", + Labels: map[string]string{ + "app.kubernetes.io/name": "kuberay-operator", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: "kuberay/operator:v0.5.0", + }, + }, + }, + }, + }, + }, + } + kustomizeObjects := []runtime.Object{ + &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kuberay-operator-kustomize", + Namespace: "test", + Labels: map[string]string{ + "app.kubernetes.io/name": "kuberay", + }, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: "kuberay/operator:v0.6.0", + }, + }, + }, + }, + }, + }, + } + + tests := []struct { + name string + expectedVersion string + expectedError string + kubeObjects []runtime.Object + }{ + { + name: "kubeRay operator not found", + expectedVersion: "", + expectedError: "no KubeRay operator deployments found in any namespace", + kubeObjects: nil, + }, + { + name: "find kubeRay operator version for helm chart", + expectedVersion: "v0.5.0", + expectedError: "", + kubeObjects: helmKubeObjects, + }, + { + name: "find kubeRay operator version for Kustomize", + expectedVersion: "v0.6.0", + expectedError: "", + kubeObjects: kustomizeObjects, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + kubeClientSet := kubeFake.NewClientset(tc.kubeObjects...) + client := NewClientForTesting(kubeClientSet, nil) + + version, err := client.GetKubeRayOperatorVersion(context.Background()) + + if tc.expectedVersion != "" { + assert.Equal(t, version, tc.expectedVersion) + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.expectedError) + } + }) + } +} + +func TestGetRayHeadSvcNameByRayCluster(t *testing.T) { + kubeObjects := []runtime.Object{} + + rayObjects := []runtime.Object{ + &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster-default", + Namespace: "default", + }, + Status: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "raycluster-default-head-svc", + }, + }, + }, + &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster-test", + Namespace: "test", + }, + Status: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "raycluster-test-head-svc", + }, + }, + }, + } + + kubeClientSet := kubeFake.NewClientset(kubeObjects...) + rayClient := rayClientFake.NewSimpleClientset(rayObjects...) + client := NewClientForTesting(kubeClientSet, rayClient) + + tests := []struct { + name string + namespace string + resourceName string + serviceName string + }{ + { + name: "find service name in default namespace", + namespace: "default", + resourceName: "raycluster-default", + serviceName: "raycluster-default-head-svc", + }, + { + name: "find service name in test namespace", + namespace: "test", + resourceName: "raycluster-test", + serviceName: "raycluster-test-head-svc", + }, + { + name: "resource not found", + namespace: "default", + resourceName: "raycluster-not-found", + serviceName: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + svcName, err := client.GetRayHeadSvcName(context.Background(), tc.namespace, util.RayCluster, tc.resourceName) + if tc.serviceName == "" { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.serviceName, svcName) + } + }) + } +} + +func TestGetRayHeadSvcNameByRayJob(t *testing.T) { + kubeObjects := []runtime.Object{} + + rayObjects := []runtime.Object{ + &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rayjob-default", + Namespace: "default", + }, + Status: rayv1.RayJobStatus{ + RayClusterStatus: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "rayjob-default-raycluster-xxxxx-head-svc", + }, + }, + }, + }, + &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rayjob-test", + Namespace: "test", + }, + Status: rayv1.RayJobStatus{ + RayClusterStatus: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "rayjob-test-raycluster-xxxxx-head-svc", + }, + }, + }, + }, + } + + kubeClientSet := kubeFake.NewClientset(kubeObjects...) + rayClient := rayClientFake.NewSimpleClientset(rayObjects...) + client := NewClientForTesting(kubeClientSet, rayClient) + + tests := []struct { + name string + namespace string + resourceName string + serviceName string + }{ + { + name: "find service name in default namespace", + namespace: "default", + resourceName: "rayjob-default", + serviceName: "rayjob-default-raycluster-xxxxx-head-svc", + }, + { + name: "find service name in test namespace", + namespace: "test", + resourceName: "rayjob-test", + serviceName: "rayjob-test-raycluster-xxxxx-head-svc", + }, + { + name: "resource not found", + namespace: "default", + resourceName: "rayjob-not-found", + serviceName: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + svcName, err := client.GetRayHeadSvcName(context.Background(), tc.namespace, util.RayJob, tc.resourceName) + if tc.serviceName == "" { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.serviceName, svcName) + } + }) + } +} + +func TestGetRayHeadSvcNameByRayService(t *testing.T) { + kubeObjects := []runtime.Object{} + + rayObjects := []runtime.Object{ + &rayv1.RayService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rayservice-default", + Namespace: "default", + }, + Status: rayv1.RayServiceStatuses{ + ActiveServiceStatus: rayv1.RayServiceStatus{ + RayClusterStatus: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "rayservice-default-raycluster-xxxxx-head-svc", + }, + }, + }, + }, + }, + &rayv1.RayService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rayservice-test", + Namespace: "test", + }, + Status: rayv1.RayServiceStatuses{ + ActiveServiceStatus: rayv1.RayServiceStatus{ + RayClusterStatus: rayv1.RayClusterStatus{ + Head: rayv1.HeadInfo{ + ServiceName: "rayservice-test-raycluster-xxxxx-head-svc", + }, + }, + }, + }, + }, + } + + kubeClientSet := kubeFake.NewClientset(kubeObjects...) + rayClient := rayClientFake.NewSimpleClientset(rayObjects...) + client := NewClientForTesting(kubeClientSet, rayClient) + + tests := []struct { + name string + namespace string + resourceName string + serviceName string + }{ + { + name: "find service name in default namespace", + namespace: "default", + resourceName: "rayservice-default", + serviceName: "rayservice-default-raycluster-xxxxx-head-svc", + }, + { + name: "find service name in test namespace", + namespace: "test", + resourceName: "rayservice-test", + serviceName: "rayservice-test-raycluster-xxxxx-head-svc", + }, + { + name: "resource not found", + namespace: "default", + resourceName: "rayservice-not-found", + serviceName: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + svcName, err := client.GetRayHeadSvcName(context.Background(), tc.namespace, util.RayService, tc.resourceName) + if tc.serviceName == "" { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + assert.Equal(t, tc.serviceName, svcName) + } + }) + } +} diff --git a/kubectl-plugin/pkg/util/completion/completion.go b/kubectl-plugin/pkg/util/completion/completion.go new file mode 100644 index 00000000000..408b075cd90 --- /dev/null +++ b/kubectl-plugin/pkg/util/completion/completion.go @@ -0,0 +1,104 @@ +package completion + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" + + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "k8s.io/kubectl/pkg/util/completion" + + "github.com/ray-project/kuberay/kubectl-plugin/pkg/util" +) + +// RayResourceTypeCompletionFunc Returns a completion function that completes the Ray resource type. +// That is, raycluster, rayjob, or rayservice. +func RayResourceTypeCompletionFunc() func(*cobra.Command, []string, string) ([]string, cobra.ShellCompDirective) { + return func(_ *cobra.Command, _ []string, toComplete string) ([]string, cobra.ShellCompDirective) { + var comps []string + directive := cobra.ShellCompDirectiveNoFileComp + resourceTypes := getAllRayResourceType() + for _, resourceType := range resourceTypes { + if strings.HasPrefix(resourceType, toComplete) { + comps = append(comps, resourceType) + } + } + return comps, directive + } +} + +// RayClusterCompletionFunc Returns a completion function that completes RayCluster resource names. +func RayClusterCompletionFunc(f cmdutil.Factory) func(*cobra.Command, []string, string) ([]string, cobra.ShellCompDirective) { + return completion.ResourceNameCompletionFunc(f, string(util.RayCluster)) +} + +// RayJobCompletionFunc Returns a completion function that completes RayJob resource names. +func RayJobCompletionFunc(f cmdutil.Factory) func(*cobra.Command, []string, string) ([]string, cobra.ShellCompDirective) { + return completion.ResourceNameCompletionFunc(f, string(util.RayJob)) +} + +// RayServiceCompletionFunc Returns a completion function that completes RayService resource names. +func RayServiceCompletionFunc(f cmdutil.Factory) func(*cobra.Command, []string, string) ([]string, cobra.ShellCompDirective) { + return completion.ResourceNameCompletionFunc(f, string(util.RayService)) +} + +// RayClusterResourceNameCompletionFunc Returns completions of: +// 1- RayCluster names that match the toComplete prefix +// 2- Ray resource types which match the toComplete prefix +func RayClusterResourceNameCompletionFunc(f cmdutil.Factory) func(*cobra.Command, []string, string) ([]string, cobra.ShellCompDirective) { + return func(_ *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + var comps []string + directive := cobra.ShellCompDirectiveNoFileComp + if len(args) == 0 { + comps, directive = doRayClusterCompletion(f, toComplete) + } + return comps, directive + } +} + +func getAllRayResourceType() []string { + return []string{ + string(util.RayCluster), + string(util.RayJob), + string(util.RayService), + } +} + +// doRayClusterCompletion Returns completions of: +// 1- RayCluster names that match the toComplete prefix +// 2- Ray resource types which match the toComplete prefix +// Ref: https://github.com/kubernetes/kubectl/blob/262825a8a665c7cae467dfaa42b63be5a5b8e5a2/pkg/util/completion/completion.go#L434 +func doRayClusterCompletion(f cmdutil.Factory, toComplete string) ([]string, cobra.ShellCompDirective) { + var comps []string + directive := cobra.ShellCompDirectiveNoFileComp + slashIdx := strings.Index(toComplete, "/") + if slashIdx == -1 { + // Standard case, complete RayCluster names + comps = completion.CompGetResource(f, string(util.RayCluster), toComplete) + + // Also include resource choices for the / form + resourceTypes := getAllRayResourceType() + + if len(comps) == 0 { + // If there are no RayCluster to complete, we will only be completing + // /. We should disable adding a space after the /. + directive |= cobra.ShellCompDirectiveNoSpace + } + + for _, resource := range resourceTypes { + if strings.HasPrefix(resource, toComplete) { + comps = append(comps, fmt.Sprintf("%s/", resource)) + } + } + } else { + // Dealing with the / form, use the specified resource type + resourceType := toComplete[:slashIdx] + toComplete = toComplete[slashIdx+1:] + nameComps := completion.CompGetResource(f, resourceType, toComplete) + for _, c := range nameComps { + comps = append(comps, fmt.Sprintf("%s/%s", resourceType, c)) + } + } + return comps, directive +} diff --git a/kubectl-plugin/pkg/util/completion/completion_test.go b/kubectl-plugin/pkg/util/completion/completion_test.go new file mode 100644 index 00000000000..669905a9f79 --- /dev/null +++ b/kubectl-plugin/pkg/util/completion/completion_test.go @@ -0,0 +1,34 @@ +package completion + +import ( + "sort" + "testing" + + "github.com/spf13/cobra" +) + +func TestRayResourceTypeCompletionFunc(t *testing.T) { + compFunc := RayResourceTypeCompletionFunc() + comps, directive := compFunc(nil, []string{}, "") + checkCompletion(t, comps, []string{"raycluster", "rayjob", "rayservice"}, directive, cobra.ShellCompDirectiveNoFileComp) +} + +func checkCompletion(t *testing.T, comps, expectedComps []string, directive, expectedDirective cobra.ShellCompDirective) { + if e, d := expectedDirective, directive; e != d { + t.Errorf("expected directive\n%v\nbut got\n%v", e, d) + } + + sort.Strings(comps) + sort.Strings(expectedComps) + + if len(expectedComps) != len(comps) { + t.Fatalf("expected completions\n%v\nbut got\n%v", expectedComps, comps) + } + + for i := range comps { + if expectedComps[i] != comps[i] { + t.Errorf("expected completions\n%v\nbut got\n%v", expectedComps, comps) + break + } + } +} diff --git a/kubectl-plugin/pkg/util/generation/generation.go b/kubectl-plugin/pkg/util/generation/generation.go new file mode 100644 index 00000000000..60cd6468ba8 --- /dev/null +++ b/kubectl-plugin/pkg/util/generation/generation.go @@ -0,0 +1,142 @@ +package generation + +import ( + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/runtime" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" +) + +type RayClusterSpecObject struct { + RayVersion string + Image string + HeadCPU string + HeadMemory string + WorkerCPU string + WorkerGPU string + WorkerMemory string + WorkerReplicas int32 +} + +type RayClusterYamlObject struct { + ClusterName string + Namespace string + RayClusterSpecObject +} + +type RayJobYamlObject struct { + RayJobName string + Namespace string + SubmissionMode string + RayClusterSpecObject +} + +func (rayClusterObject *RayClusterYamlObject) GenerateRayClusterApplyConfig() *rayv1ac.RayClusterApplyConfiguration { + rayClusterApplyConfig := rayv1ac.RayCluster(rayClusterObject.ClusterName, rayClusterObject.Namespace). + WithSpec(rayClusterObject.generateRayClusterSpec()) + + return rayClusterApplyConfig +} + +func (rayJobObject *RayJobYamlObject) GenerateRayJobApplyConfig() *rayv1ac.RayJobApplyConfiguration { + rayJobApplyConfig := rayv1ac.RayJob(rayJobObject.RayJobName, rayJobObject.Namespace). + WithSpec(rayv1ac.RayJobSpec(). + WithSubmissionMode(rayv1.JobSubmissionMode(rayJobObject.SubmissionMode)). + WithRayClusterSpec(rayJobObject.generateRayClusterSpec())) + + return rayJobApplyConfig +} + +func (rayClusterSpecObject *RayClusterSpecObject) generateRayClusterSpec() *rayv1ac.RayClusterSpecApplyConfiguration { + // TODO: Look for better workaround/fixes for RayStartParams. Currently using `WithRayStartParams()` requires + // a non-empty map with valid key value pairs and will not populate the field with empty/nil values. This + // isn't ideal as it forces the generated RayCluster yamls to use those parameters. + rayClusterSpec := rayv1ac.RayClusterSpec(). + WithRayVersion(rayClusterSpecObject.RayVersion). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"dashboard-host": "0.0.0.0"}). + WithTemplate(corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithContainers(corev1ac.Container(). + WithName("ray-head"). + WithImage(rayClusterSpecObject.Image). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(rayClusterSpecObject.HeadCPU), + corev1.ResourceMemory: resource.MustParse(rayClusterSpecObject.HeadMemory), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(rayClusterSpecObject.HeadCPU), + corev1.ResourceMemory: resource.MustParse(rayClusterSpecObject.HeadMemory), + })). + WithPorts(corev1ac.ContainerPort().WithContainerPort(6379).WithName("gcs-server"), + corev1ac.ContainerPort().WithContainerPort(8265).WithName("dashboard"), + corev1ac.ContainerPort().WithContainerPort(10001).WithName("client")))))). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithRayStartParams(map[string]string{"metrics-export-port": "8080"}). + WithGroupName("default-group"). + WithReplicas(rayClusterSpecObject.WorkerReplicas). + WithTemplate(corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithContainers(corev1ac.Container(). + WithName("ray-worker"). + WithImage(rayClusterSpecObject.Image). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(rayClusterSpecObject.WorkerCPU), + corev1.ResourceMemory: resource.MustParse(rayClusterSpecObject.WorkerMemory), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(rayClusterSpecObject.WorkerCPU), + corev1.ResourceMemory: resource.MustParse(rayClusterSpecObject.WorkerMemory), + })))))) + + gpuResource := resource.MustParse(rayClusterSpecObject.WorkerGPU) + if !gpuResource.IsZero() { + var requests, limits corev1.ResourceList + requests = *rayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests + limits = *rayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Limits + requests[corev1.ResourceName("nvidia.com/gpu")] = gpuResource + limits[corev1.ResourceName("nvidia.com/gpu")] = gpuResource + + rayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests = &requests + rayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Limits = &limits + } + + return rayClusterSpec +} + +// Converts RayClusterApplyConfiguration object into a yaml string +func ConvertRayClusterApplyConfigToYaml(rayClusterac *rayv1ac.RayClusterApplyConfiguration) (string, error) { + resource, err := runtime.DefaultUnstructuredConverter.ToUnstructured(rayClusterac) + if err != nil { + return "", err + } + + podByte, err := yaml.Marshal(resource) + if err != nil { + return "", err + } + + return string(podByte), nil +} + +// Converts RayJobApplyConfiguration object into a yaml string +func ConvertRayJobApplyConfigToYaml(rayJobac *rayv1ac.RayJobApplyConfiguration) (string, error) { + resource, err := runtime.DefaultUnstructuredConverter.ToUnstructured(rayJobac) + if err != nil { + return "", err + } + + podByte, err := yaml.Marshal(resource) + if err != nil { + return "", err + } + + return string(podByte), nil +} diff --git a/kubectl-plugin/pkg/util/generation/generation_test.go b/kubectl-plugin/pkg/util/generation/generation_test.go new file mode 100644 index 00000000000..7e1f9b38fab --- /dev/null +++ b/kubectl-plugin/pkg/util/generation/generation_test.go @@ -0,0 +1,145 @@ +package generation + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +func TestGenerateRayCluterApplyConfig(t *testing.T) { + testRayClusterYamlObject := RayClusterYamlObject{ + ClusterName: "test-ray-cluster", + Namespace: "default", + RayClusterSpecObject: RayClusterSpecObject{ + RayVersion: "2.39.0", + Image: "rayproject/ray:2.39.0", + HeadCPU: "1", + HeadMemory: "5Gi", + WorkerReplicas: 3, + WorkerCPU: "2", + WorkerMemory: "10Gi", + WorkerGPU: "1", + }, + } + + result := testRayClusterYamlObject.GenerateRayClusterApplyConfig() + + assert.Equal(t, testRayClusterYamlObject.ClusterName, *result.Name) + assert.Equal(t, testRayClusterYamlObject.Namespace, *result.Namespace) + assert.Equal(t, testRayClusterYamlObject.RayVersion, *result.Spec.RayVersion) + assert.Equal(t, testRayClusterYamlObject.Image, *result.Spec.HeadGroupSpec.Template.Spec.Containers[0].Image) + assert.Equal(t, resource.MustParse(testRayClusterYamlObject.HeadCPU), *result.Spec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Requests.Cpu()) + assert.Equal(t, resource.MustParse(testRayClusterYamlObject.HeadMemory), *result.Spec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Requests.Memory()) + assert.Equal(t, "default-group", *result.Spec.WorkerGroupSpecs[0].GroupName) + assert.Equal(t, testRayClusterYamlObject.WorkerReplicas, *result.Spec.WorkerGroupSpecs[0].Replicas) + assert.Equal(t, resource.MustParse(testRayClusterYamlObject.WorkerCPU), *result.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests.Cpu()) + assert.Equal(t, resource.MustParse(testRayClusterYamlObject.WorkerGPU), *result.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests.Name(corev1.ResourceName("nvidia.com/gpu"), resource.DecimalSI)) + assert.Equal(t, resource.MustParse(testRayClusterYamlObject.WorkerMemory), *result.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests.Memory()) +} + +func TestGenerateRayJobApplyConfig(t *testing.T) { + testRayJobYamlObject := RayJobYamlObject{ + RayJobName: "test-ray-job", + Namespace: "default", + SubmissionMode: "InteractiveMode", + RayClusterSpecObject: RayClusterSpecObject{ + RayVersion: "2.39.0", + Image: "rayproject/ray:2.39.0", + HeadCPU: "1", + HeadMemory: "5Gi", + WorkerReplicas: 3, + WorkerCPU: "2", + WorkerMemory: "10Gi", + WorkerGPU: "0", + }, + } + + result := testRayJobYamlObject.GenerateRayJobApplyConfig() + + assert.Equal(t, testRayJobYamlObject.RayJobName, *result.Name) + assert.Equal(t, testRayJobYamlObject.Namespace, *result.Namespace) + assert.Equal(t, rayv1.JobSubmissionMode(testRayJobYamlObject.SubmissionMode), *result.Spec.SubmissionMode) + assert.Equal(t, testRayJobYamlObject.RayVersion, *result.Spec.RayClusterSpec.RayVersion) + assert.Equal(t, testRayJobYamlObject.Image, *result.Spec.RayClusterSpec.HeadGroupSpec.Template.Spec.Containers[0].Image) + assert.Equal(t, resource.MustParse(testRayJobYamlObject.HeadCPU), *result.Spec.RayClusterSpec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Requests.Cpu()) + assert.Equal(t, resource.MustParse(testRayJobYamlObject.HeadMemory), *result.Spec.RayClusterSpec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Requests.Memory()) + assert.Equal(t, "default-group", *result.Spec.RayClusterSpec.WorkerGroupSpecs[0].GroupName) + assert.Equal(t, testRayJobYamlObject.WorkerReplicas, *result.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas) + assert.Equal(t, resource.MustParse(testRayJobYamlObject.WorkerCPU), *result.Spec.RayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests.Cpu()) + assert.Equal(t, resource.MustParse(testRayJobYamlObject.WorkerMemory), *result.Spec.RayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests.Memory()) +} + +func TestConvertRayClusterApplyConfigToYaml(t *testing.T) { + testRayClusterYamlObject := RayClusterYamlObject{ + ClusterName: "test-ray-cluster", + Namespace: "default", + RayClusterSpecObject: RayClusterSpecObject{ + RayVersion: "2.39.0", + Image: "rayproject/ray:2.39.0", + HeadCPU: "1", + HeadMemory: "5Gi", + WorkerReplicas: 3, + WorkerCPU: "2", + WorkerMemory: "10Gi", + WorkerGPU: "0", + }, + } + + result := testRayClusterYamlObject.GenerateRayClusterApplyConfig() + + resultString, err := ConvertRayClusterApplyConfigToYaml(result) + assert.Nil(t, err) + expectedResultYaml := `apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: test-ray-cluster + namespace: default +spec: + headGroupSpec: + rayStartParams: + dashboard-host: 0.0.0.0 + template: + spec: + containers: + - image: rayproject/ray:2.39.0 + name: ray-head + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + memory: 5Gi + requests: + cpu: "1" + memory: 5Gi + rayVersion: 2.39.0 + workerGroupSpecs: + - groupName: default-group + rayStartParams: + metrics-export-port: "8080" + replicas: 3 + template: + spec: + containers: + - image: rayproject/ray:2.39.0 + name: ray-worker + resources: + limits: + cpu: "2" + memory: 10Gi + requests: + cpu: "2" + memory: 10Gi` + + assert.Equal(t, expectedResultYaml, strings.TrimSpace(resultString)) +} diff --git a/kubectl-plugin/pkg/util/types.go b/kubectl-plugin/pkg/util/types.go new file mode 100644 index 00000000000..833417f3f6f --- /dev/null +++ b/kubectl-plugin/pkg/util/types.go @@ -0,0 +1,9 @@ +package util + +type ResourceType string + +const ( + RayCluster ResourceType = "raycluster" + RayJob ResourceType = "rayjob" + RayService ResourceType = "rayservice" +) diff --git a/kubectl-plugin/test/e2e/kubectl_ray_cluster_get_test.go b/kubectl-plugin/test/e2e/kubectl_ray_cluster_get_test.go new file mode 100644 index 00000000000..cc3aca9aef5 --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_cluster_get_test.go @@ -0,0 +1,73 @@ +package e2e + +import ( + "bytes" + "os/exec" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/cli-runtime/pkg/printers" +) + +var _ = Describe("Calling ray plugin `get` command", func() { + var namespace string + + BeforeEach(func() { + namespace = createTestNamespace() + deployTestRayCluster(namespace) + DeferCleanup(func() { + deleteTestNamespace(namespace) + namespace = "" + }) + }) + + It("succeed in getting ray cluster information", func() { + cmd := exec.Command("kubectl", "ray", "get", "cluster", "--namespace", namespace) + output, err := cmd.CombinedOutput() + + expectedOutputTablePrinter := printers.NewTablePrinter(printers.PrintOptions{}) + expectedTestResultTable := &v1.Table{ + ColumnDefinitions: []v1.TableColumnDefinition{ + {Name: "Name", Type: "string"}, + {Name: "Namespace", Type: "string"}, + {Name: "Desired Workers", Type: "string"}, + {Name: "Available Workers", Type: "string"}, + {Name: "CPUs", Type: "string"}, + {Name: "GPUs", Type: "string"}, + {Name: "TPUs", Type: "string"}, + {Name: "Memory", Type: "string"}, + {Name: "Age", Type: "string"}, + }, + } + + expectedTestResultTable.Rows = append(expectedTestResultTable.Rows, v1.TableRow{ + Cells: []interface{}{ + "raycluster-kuberay", + namespace, + "1", + "1", + "2", + "0", + "0", + "3G", + }, + }) + + var resbuffer bytes.Buffer + bufferr := expectedOutputTablePrinter.PrintObj(expectedTestResultTable, &resbuffer) + Expect(bufferr).NotTo(HaveOccurred()) + + Expect(err).NotTo(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).To(ContainSubstring(strings.TrimSpace(resbuffer.String()))) + }) + + It("should not succeed", func() { + cmd := exec.Command("kubectl", "ray", "get", "cluster", "--namespace", namespace, "fakeclustername", "anotherfakeclustername") + output, err := cmd.CombinedOutput() + + Expect(err).To(HaveOccurred()) + Expect(output).ToNot(ContainElements("fakeclustername")) + }) +}) diff --git a/kubectl-plugin/test/e2e/kubectl_ray_e2e_suite_test.go b/kubectl-plugin/test/e2e/kubectl_ray_e2e_suite_test.go new file mode 100644 index 00000000000..cf96ca75996 --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_e2e_suite_test.go @@ -0,0 +1,13 @@ +package e2e + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestKubectlRayCommand(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Kubectl Ray e2e Test Suite") +} diff --git a/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go b/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go new file mode 100644 index 00000000000..da55c468d79 --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_job_submit_test.go @@ -0,0 +1,103 @@ +package e2e + +import ( + "os/exec" + "path" + "regexp" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Directory when running test is kuberay/kubectl-plugin/test/e2e/ +const ( + rayJobFilePath = "./testdata/ray-job.interactive-mode.yaml" + rayJobNoEnvFilePath = "./testdata/ray-job.interactive-mode-no-runtime-env.yaml" + kubectlRayJobWorkingDir = "./testdata/rayjob-submit-working-dir/" + entrypointSampleFileName = "entrypoint-python-sample.py" + runtimeEnvSampleFileName = "runtime-env-sample.yaml" +) + +var _ = Describe("Calling ray plugin `job submit` command on Ray Job", func() { + var namespace string + + BeforeEach(func() { + namespace = createTestNamespace() + deployTestRayCluster(namespace) + DeferCleanup(func() { + deleteTestNamespace(namespace) + namespace = "" + }) + }) + + It("succeed in submitting RayJob", func() { + cmd := exec.Command("kubectl", "ray", "job", "submit", "--namespace", namespace, "-f", rayJobFilePath, "--working-dir", kubectlRayJobWorkingDir, "--", "python", entrypointSampleFileName) + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + // Retrieve the Job ID from the output + regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`) + matches := regexExp.FindStringSubmatch(string(output)) + + Expect(len(matches)).To(BeNumerically(">=", 2)) + cmdOutputJobID := matches[1] + + // Use kubectl to check status of the rayjob + // Retrieve Job ID + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(cmdOutputJobID).To(Equal(string(output))) + + // Retrieve Job Status + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("SUCCEEDED")) + + // Retrieve Job Deployment Status + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("Complete")) + }) + + It("succeed in submitting RayJob with runtime environment set with working dir", func() { + runtimeEnvFilePath := path.Join(kubectlRayJobWorkingDir, runtimeEnvSampleFileName) + cmd := exec.Command("kubectl", "ray", "job", "submit", "--namespace", namespace, "-f", rayJobNoEnvFilePath, "--runtime-env", runtimeEnvFilePath, "--", "python", entrypointSampleFileName) + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + // Retrieve the Job ID from the output + regexExp := regexp.MustCompile(`'([^']*raysubmit[^']*)'`) + matches := regexExp.FindStringSubmatch(string(output)) + + Expect(len(matches)).To(BeNumerically(">=", 2)) + cmdOutputJobID := matches[1] + + // Use kubectl to check status of the rayjob + // Retrieve Job ID + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobId}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(cmdOutputJobID).To(Equal(string(output))) + + // Retrieve Job Status + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("SUCCEEDED")) + + // Retrieve Job Deployment Status + cmd = exec.Command("kubectl", "get", "--namespace", namespace, "rayjob", "rayjob-sample", "-o", "jsonpath={.status.jobDeploymentStatus}") + output, err = cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred()) + + Expect(string(output)).To(Equal("Complete")) + }) +}) diff --git a/kubectl-plugin/test/e2e/kubectl_ray_log_test.go b/kubectl-plugin/test/e2e/kubectl_ray_log_test.go new file mode 100644 index 00000000000..2f059a88c0e --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_log_test.go @@ -0,0 +1,230 @@ +package e2e + +import ( + "os" + "os/exec" + "path" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var requiredFileSet = map[string]string{ + "stdout.log": "Ray runtime started", + "raylet.out": "Ray Event initialized for RAYLET", +} + +var _ = Describe("Calling ray plugin `log` command on Ray Cluster", func() { + var namespace string + + BeforeEach(func() { + namespace = createTestNamespace() + deployTestRayCluster(namespace) + DeferCleanup(func() { + deleteTestNamespace(namespace) + namespace = "" + }) + }) + + It("succeed in retrieving all ray cluster logs", func() { + expectedDirPath := "./raycluster-kuberay" + expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+` + + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "all") + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).Should(MatchRegexp(expectedOutputStringFormat)) + + // Check that the log directory exists + logDirInfo, err := os.Stat(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(logDirInfo.IsDir()).To(BeTrue()) + + // Check the contents of the cluster directory + fileList, err := os.ReadDir(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(fileList).To(HaveLen(2)) + + for _, file := range fileList { + Expect(file.IsDir()).To(BeTrue()) + + // Check that the files exist and have correct content + logList, err := os.ReadDir(path.Join(expectedDirPath, file.Name())) + Expect(err).NotTo(HaveOccurred()) + + currentRequiredFileList := make(map[string]string) + + for key, value := range requiredFileSet { + currentRequiredFileList[key] = value + } + + for _, logFile := range logList { + if checkContent := currentRequiredFileList[logFile.Name()]; checkContent != "" { + delete(currentRequiredFileList, logFile.Name()) + + // read and check file content + fileContentByte, err := os.ReadFile(path.Join(expectedDirPath, file.Name(), logFile.Name())) + Expect(err).NotTo(HaveOccurred()) + + fileContent := string(fileContentByte) + + Expect(fileContent).To(ContainSubstring(checkContent)) + } + } + + Expect(currentRequiredFileList).To(BeEmpty()) + } + + // Cleanup + err = os.RemoveAll(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + }) + + It("succeed in retrieving ray cluster head logs", func() { + expectedDirPath := "./raycluster-kuberay" + expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve only head node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+` + + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "head") + output, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).Should(MatchRegexp(expectedOutputStringFormat)) + + logDirInfo, err := os.Stat(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(logDirInfo.IsDir()).To(BeTrue()) + + // Check the contents of the cluster directory + fileList, err := os.ReadDir(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(fileList).To(HaveLen(1)) + + for _, file := range fileList { + Expect(file.IsDir()).To(BeTrue()) + + // Check that the files exist and have correct content + logList, err := os.ReadDir(path.Join(expectedDirPath, file.Name())) + Expect(err).NotTo(HaveOccurred()) + + currentRequiredFileList := make(map[string]string) + + for key, value := range requiredFileSet { + currentRequiredFileList[key] = value + } + + for _, logFile := range logList { + if checkContent := currentRequiredFileList[logFile.Name()]; checkContent != "" { + delete(currentRequiredFileList, logFile.Name()) + + // read and check file content + fileContentByte, err := os.ReadFile(path.Join(expectedDirPath, file.Name(), logFile.Name())) + Expect(err).NotTo(HaveOccurred()) + + fileContent := string(fileContentByte) + + Expect(fileContent).To(ContainSubstring(checkContent)) + } + } + + Expect(currentRequiredFileList).To(BeEmpty()) + } + + // Cleanup + err = os.RemoveAll(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + }) + + It("succeed in retrieving ray cluster worker logs", func() { + expectedDirPath := "./raycluster-kuberay" + expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve only worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+` + + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "worker") + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).Should(MatchRegexp(expectedOutputStringFormat)) + + logDirInfo, err := os.Stat(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(logDirInfo.IsDir()).To(BeTrue()) + + // Check the contents of the cluster directory + fileList, err := os.ReadDir(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(fileList).To(HaveLen(1)) + + for _, file := range fileList { + Expect(file.IsDir()).To(BeTrue()) + + // Check that the files exist and have correct content + logList, err := os.ReadDir(path.Join(expectedDirPath, file.Name())) + Expect(err).NotTo(HaveOccurred()) + + currentRequiredFileList := make(map[string]string) + + for key, value := range requiredFileSet { + currentRequiredFileList[key] = value + } + + for _, logFile := range logList { + if checkContent := currentRequiredFileList[logFile.Name()]; checkContent != "" { + delete(currentRequiredFileList, logFile.Name()) + + // read and check file content + fileContentByte, err := os.ReadFile(path.Join(expectedDirPath, file.Name(), logFile.Name())) + Expect(err).NotTo(HaveOccurred()) + + fileContent := string(fileContentByte) + + Expect(fileContent).To(ContainSubstring(checkContent)) + } + } + + Expect(currentRequiredFileList).To(BeEmpty()) + } + + // Cleanup + err = os.RemoveAll(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + }) + + It("succeed in retrieving ray cluster logs within designated directory", func() { + expectedDirPath := "./temporary-directory" + expectedOutputStringFormat := `Command set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+` + + err := os.MkdirAll(expectedDirPath, 0o755) + Expect(err).NotTo(HaveOccurred()) + + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "all", "--out-dir", expectedDirPath) + output, err := cmd.CombinedOutput() + + Expect(err).NotTo(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).Should(MatchRegexp(expectedOutputStringFormat)) + + // Check the contents of the cluster directory + fileList, err := os.ReadDir(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + Expect(fileList).To(HaveLen(2)) + + // Cleanup + err = os.RemoveAll(expectedDirPath) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should not succeed with non-existent cluster", func() { + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "fakeclustername") + output, err := cmd.CombinedOutput() + + Expect(err).To(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).To(ContainSubstring("No ray nodes found for resource fakecluster")) + }) + + It("should not succeed with non-existent directory set", func() { + cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--out-dir", "./fake-directory") + output, err := cmd.CombinedOutput() + + Expect(err).To(HaveOccurred()) + Expect(strings.TrimSpace(string(output))).To(ContainSubstring("Directory does not exist.")) + }) +}) diff --git a/kubectl-plugin/test/e2e/kubectl_ray_session_test.go b/kubectl-plugin/test/e2e/kubectl_ray_session_test.go new file mode 100644 index 00000000000..695dc2ba450 --- /dev/null +++ b/kubectl-plugin/test/e2e/kubectl_ray_session_test.go @@ -0,0 +1,128 @@ +package e2e + +import ( + "context" + "errors" + "os" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Calling ray plugin `session` command", func() { + var namespace string + + BeforeEach(func() { + namespace = createTestNamespace() + deployTestRayCluster(namespace) + DeferCleanup(func() { + deleteTestNamespace(namespace) + namespace = "" + }) + }) + + It("succeed in forwarding RayCluster and should be able to cancel", func() { + cmd := exec.Command("kubectl", "ray", "session", "--namespace", namespace, "raycluster-kuberay") + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + err := cmd.Start() + Expect(err).NotTo(HaveOccurred()) + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + // Send a request to localhost:8265, it should succeed + Eventually(func() error { + _, err := exec.Command("curl", "http://localhost:8265").CombinedOutput() + return err + }, 3*time.Second, 500*time.Millisecond).ShouldNot(HaveOccurred()) + + // Send a signal to cancel the command + err = cmd.Process.Signal(os.Interrupt) + Expect(err).NotTo(HaveOccurred()) + + select { + case <-ctx.Done(): + // Timeout, kill the process + Expect(ctx.Err()).To(Equal(context.DeadlineExceeded)) + err = cmd.Process.Kill() + Expect(err).NotTo(HaveOccurred()) + Fail("kubectl ray session command did not finish in time") + case err = <-done: + // It should not have error, or ExitError due to interrupt + if err != nil { + exitErr := &exec.ExitError{} + Expect(errors.As(err, &exitErr)).To(BeTrue()) + Expect(exitErr.String()).To(Equal("signal: interrupt")) + } + } + }) + + It("should reconnect after pod connection is lost", func() { + Skip("Skip this because it is flaky now") + sessionCmd := exec.Command("kubectl", "ray", "session", "--namespace", namespace, "raycluster-kuberay") + + err := sessionCmd.Start() + Expect(err).NotTo(HaveOccurred()) + + // Send a request to localhost:8265, it should succeed + Eventually(func() error { + _, err := exec.Command("curl", "http://localhost:8265").CombinedOutput() + return err + }, 3*time.Second, 500*time.Millisecond).ShouldNot(HaveOccurred()) + + // Get the current head pod name + cmd := exec.Command("kubectl", "get", "--namespace", namespace, "raycluster/raycluster-kuberay", "-o", "jsonpath={.status.head.podName}") + output, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred()) + oldPodName := string(output) + var newPodName string + + // Delete the pod + cmd = exec.Command("kubectl", "delete", "--namespace", namespace, "pod", oldPodName) + err = cmd.Run() + Expect(err).NotTo(HaveOccurred()) + + // Wait for the new pod to be created + Eventually(func() error { + cmd := exec.Command("kubectl", "get", "--namespace", namespace, "raycluster/raycluster-kuberay", "-o", "jsonpath={.status.head.podName}") + output, err := cmd.CombinedOutput() + newPodName = string(output) + if err != nil { + return err + } + if string(output) == oldPodName { + return err + } + return nil + }, 60*time.Second, 1*time.Second).ShouldNot(HaveOccurred()) + + // Wait for the new pod to be ready + cmd = exec.Command("kubectl", "wait", "--namespace", namespace, "pod", newPodName, "--for=condition=Ready", "--timeout=60s") + err = cmd.Run() + Expect(err).NotTo(HaveOccurred()) + + // Send a request to localhost:8265, it should succeed + Eventually(func() error { + _, err := exec.Command("curl", "http://localhost:8265").CombinedOutput() + return err + }, 60*time.Second, 1*time.Millisecond).ShouldNot(HaveOccurred()) + + err = sessionCmd.Process.Kill() + Expect(err).NotTo(HaveOccurred()) + _ = sessionCmd.Wait() + }) + + It("should not succeed", func() { + cmd := exec.Command("kubectl", "ray", "session", "--namespace", namespace, "fakeclustername") + output, err := cmd.CombinedOutput() + + Expect(err).To(HaveOccurred()) + Expect(output).ToNot(ContainElements("fakeclustername")) + }) +}) diff --git a/kubectl-plugin/test/e2e/support.go b/kubectl-plugin/test/e2e/support.go new file mode 100644 index 00000000000..854c95508e8 --- /dev/null +++ b/kubectl-plugin/test/e2e/support.go @@ -0,0 +1,52 @@ +package e2e + +import ( + "math/rand" + "os/exec" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789" + +func randStringBytes(n int) string { + // Reference: https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-go/22892986 + b := make([]byte, n) + for i := range b { + b[i] = letterBytes[rand.Intn(len(letterBytes))] //nolint:gosec // Don't need cryptographically secure random number + } + return string(b) +} + +func createTestNamespace() string { + GinkgoHelper() + suffix := randStringBytes(5) + ns := "test-ns-" + suffix + cmd := exec.Command("kubectl", "create", "namespace", ns) + err := cmd.Run() + Expect(err).NotTo(HaveOccurred()) + nsWithPrefix := "namespace/" + ns + cmd = exec.Command("kubectl", "wait", "--timeout=20s", "--for", "jsonpath={.status.phase}=Active", nsWithPrefix) + err = cmd.Run() + Expect(err).NotTo(HaveOccurred()) + return ns +} + +func deleteTestNamespace(ns string) { + GinkgoHelper() + cmd := exec.Command("kubectl", "delete", "namespace", ns) + err := cmd.Run() + Expect(err).NotTo(HaveOccurred()) +} + +func deployTestRayCluster(ns string) { + GinkgoHelper() + // Print current working directory + cmd := exec.Command("kubectl", "apply", "-f", "../../../ray-operator/config/samples/ray-cluster.sample.yaml", "-n", ns) + err := cmd.Run() + Expect(err).NotTo(HaveOccurred()) + cmd = exec.Command("kubectl", "wait", "--timeout=300s", "--for", "jsonpath={.status.state}=ready", "raycluster/raycluster-kuberay", "-n", ns) + err = cmd.Run() + Expect(err).NotTo(HaveOccurred()) +} diff --git a/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml new file mode 100644 index 00000000000..d883da72a92 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode-no-runtime-env.yaml @@ -0,0 +1,48 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + submissionMode: 'InteractiveMode' + rayClusterSpec: + rayVersion: '2.39.0' + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.39.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "200m" + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.39.0 + lifecycle: + preStop: + exec: + command: [ "/bin/sh","-c","ray stop" ] + resources: + limits: + cpu: "1" + requests: + cpu: "200m" diff --git a/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml new file mode 100644 index 00000000000..7808ebad327 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/ray-job.interactive-mode.yaml @@ -0,0 +1,57 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + # The current value is "InteractiveMode", meaning that it will wait for user to submit job and provide the job submission ID + submissionMode: 'InteractiveMode' + runtimeEnvYAML: | + pip: + - emoji==2.14.0 + - pyjokes==0.6.0 + env_vars: + test_env_var: "first_env_var" + another_env_var: "second_env_var" + + rayClusterSpec: + rayVersion: '2.39.0' # should match the Ray version in the image of the containers + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.39.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "200m" + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.39.0 + lifecycle: + preStop: + exec: + command: [ "/bin/sh","-c","ray stop" ] + resources: + limits: + cpu: "1" + requests: + cpu: "200m" diff --git a/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py new file mode 100644 index 00000000000..18b6de6bae6 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/entrypoint-python-sample.py @@ -0,0 +1,19 @@ +import ray +import os +import emoji +import pyjokes + +ray.init() + +@ray.remote +def f(): + assert emoji.__version__ == "2.14.0" + assert pyjokes.__version__ == "0.6.0" + + first_env_var = os.getenv("test_env_var") + second_env_var = os.getenv("another_env_var") + + assert first_env_var == "first_env_var" + assert second_env_var == "second_env_var" + +ray.get(f.remote()) diff --git a/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml new file mode 100644 index 00000000000..651db18d969 --- /dev/null +++ b/kubectl-plugin/test/e2e/testdata/rayjob-submit-working-dir/runtime-env-sample.yaml @@ -0,0 +1,7 @@ +pip: + - emoji==2.14.0 + - pyjokes==0.6.0 +env_vars: + test_env_var: "first_env_var" + another_env_var: "second_env_var" +working_dir: ./testdata/rayjob-submit-working-dir/ diff --git a/kuberay.code-workspace b/kuberay.code-workspace index dfbb635ab71..e25bd09991f 100644 --- a/kuberay.code-workspace +++ b/kuberay.code-workspace @@ -10,11 +10,11 @@ "path": "apiserver" }, { - "path": "cli" + "path": "kubectl-plugin" }, { "path": "proto" } ], "settings": {} -} \ No newline at end of file +} diff --git a/mkdocs.yml b/mkdocs.yml index 7d4c285261c..b9807da43f1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,7 +29,8 @@ nav: - Components: - KubeRay Operator: components/operator.md - KubeRay API Server: components/apiserver.md - - KubeRay CLI: components/cli.md + - KubeRay Python Client: components/pythonclient.md + - KubeRay Python API Client: components/pythonapiclient.md - Features: - RayService: guidance/rayservice.md - RayJob: guidance/rayjob.md diff --git a/proto/Dockerfile b/proto/Dockerfile index c1022b73e91..dd8a3b788aa 100644 --- a/proto/Dockerfile +++ b/proto/Dockerfile @@ -1,5 +1,5 @@ # Generate client code (go & json) from API protocol buffers -FROM registry.access.redhat.com/ubi9/go-toolset:1.20.10 as generator +FROM golang:1.22.4-bullseye as generator ENV PROTOC_VERSION 3.17.3 ENV GOLANG_PROTOBUF_VERSION v1.5.2 diff --git a/proto/README.md b/proto/README.md index ba5577a6aa8..7da217eeabd 100644 --- a/proto/README.md +++ b/proto/README.md @@ -38,7 +38,7 @@ Use the tools [bootprint-openapi](https://github.com/bootprint/bootprint-monorep Third-party proto dependencies are synchronized back to `proto/third_party` for easier development (IDE friendly). Ideally, the directory for searching imports should be specified instead. ```bash -protoc -I. +protoc -I. -I/go/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis \ -I/go/src/github.com/grpc-ecosystem/grpc-gateway/ \ -I/go/src/github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger/options/ \ diff --git a/proto/cluster.proto b/proto/cluster.proto index 411014203c7..fba86a5fd66 100644 --- a/proto/cluster.proto +++ b/proto/cluster.proto @@ -66,7 +66,7 @@ service ClusterService { message CreateClusterRequest { // Required. The cluster to be created. Cluster cluster = 1 [(google.api.field_behavior) = REQUIRED]; - // Required. The namespace of the cluster to be created. + // Required. The namespace of the cluster to be created. string namespace = 2 [(google.api.field_behavior) = REQUIRED]; } @@ -134,7 +134,7 @@ message EnvValueFrom { Source source = 1; // Name for config map or secret, container name for resource, path for field string name = 2; - // Key for config map or secret, resource name for resource + // Key for config map or secret, resource name for resource string key = 3; } @@ -146,18 +146,18 @@ message EnvironmentVariables { message AutoscalerOptions { // IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. - // Defaults to 60 (one minute). - int32 idleTimeoutSeconds = 1; - // UpscalingMode is "Conservative", "Default", or "Aggressive." - // Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. - // Default: Upscaling is not rate-limited. - // Aggressive: An alias for Default; upscaling is not rate-limited. - // It is not read by the KubeRay operator but by the Ray autoscaler. - string upscalingMode = 2; + // Defaults to 60 (one minute). + int32 idleTimeoutSeconds = 1; + // UpscalingMode is "Conservative", "Default", or "Aggressive." + // Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. + // Default: Upscaling is not rate-limited. + // Aggressive: An alias for Default; upscaling is not rate-limited. + // It is not read by the KubeRay operator but by the Ray autoscaler. + string upscalingMode = 2; // Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. - string image = 3; - // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. - string imagePullPolicy = 4; + string image = 3; + // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. + string imagePullPolicy = 4; // Optional CPUs requirements for autoscaler - default "500m" string cpu = 5; // Optional memory requirements for autoscaler - default "512Mi" @@ -189,11 +189,11 @@ message Cluster { PRODUCTION = 3; } Environment environment = 5; - + // Required field. This field indicates ray cluster configuration ClusterSpec cluster_spec = 6 [(google.api.field_behavior) = REQUIRED]; - - // Optional. Annotations, for example, "kubernetes.io/ingress.class" to define Ingress class + + // Optional. Annotations, for example, "kubernetes.io/ingress.class" to define Ingress class map annotations = 7; // Optional input field. Container environment variables from user. @@ -210,7 +210,7 @@ message Cluster { // Output. The list related to the cluster. repeated ClusterEvent events = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; - + // Output. The service endpoint of the cluster map service_endpoint = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; } @@ -222,9 +222,9 @@ message ClusterSpec { // Optional. The worker group configurations repeated WorkerGroupSpec worker_group_spec = 2; // EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs - bool enableInTreeAutoscaling = 3; - // AutoscalerOptions specifies optional configuration for the Ray autoscaler. - AutoscalerOptions autoscalerOptions = 4; + bool enableInTreeAutoscaling = 3; + // AutoscalerOptions specifies optional configuration for the Ray autoscaler. + AutoscalerOptions autoscalerOptions = 4; } @@ -242,8 +242,8 @@ message Volume { string name = 3; // volume name string source = 4; // volume source, for example hostpath source, secret or configMap name, etc bool read_only = 5; // Read only flag - - // If indicate hostpath, we need to let user indicate which type + + // If indicate hostpath, we need to let user indicate which type // they would like to use. enum HostPathType { DIRECTORY = 0; @@ -256,19 +256,38 @@ message Volume { HOSTTOCONTAINER = 1; BIDIRECTIONAL = 2; } - MountPropagationMode mount_propagation_mode = 7; + MountPropagationMode mount_propagation_mode = 7; // If indicate ephemeral, we need to let user specify volumeClaimTemplate string storageClassName = 8; // If not defined, default is used enum AccessMode { RWO = 0; // ReadWriteOnce ROX = 1; // ReadOnlyMany - RWX = 2; // ReadWriteMany + RWX = 2; // ReadWriteMany } AccessMode accessMode = 9; string storage = 10; // For ephemeral - required storage, GB, for empty dir - MB map items = 11; // Items used for configMap and secrets } +// Adds and removes POSIX capabilities from running containers. +message Capabilities { + // Optional. Added capabilities + repeated string add = 1; + + // Optional. Removed capabilities + repeated string drop = 2; +} + +// SecurityContext holds security configuration that will be applied to a container. +// Some fields are present in both SecurityContext and PodSecurityContext. When both +// are set, the values in SecurityContext take precedence. +message SecurityContext { + // Optional. The capabilities to add/drop when running containers. + Capabilities capabilities = 1; + // Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false. + optional bool privileged = 2; +} + // Cluster HeadGroup specification message HeadGroupSpec { // Required. The computeTemplate of head node group @@ -280,21 +299,25 @@ message HeadGroupSpec { // Optional. Enable Ingress // if Ingress is enabled, we might have to specify annotation IngressClassAnnotationKey, for the cluster itself, defining Ingress class bool enableIngress = 4; - // Required. The ray start params of head node group. + // Required. The ray start params of head node group. map ray_start_params = 5 [(google.api.field_behavior) = REQUIRED]; // Optional. The volumes mount to head pod repeated Volume volumes = 6; // Optional. ServiceAccount used by head pod // Note that the service account has to be created prior to usage here - string service_account = 7; + string service_account = 7; // Optional. image pull secret used by head pod - string image_pull_secret = 8; + string image_pull_secret = 8; // Optional. Environment variables for head pod EnvironmentVariables environment = 9; // Optional. Annotations for the head pod map annotations = 10; // Optional. Labels for the head pod map labels = 11; + // Optional image pull policy We only support Always and ifNotPresent + string imagePullPolicy = 12; + // Optional. Configure the security context for the head container for debugging etc. + SecurityContext security_context = 13; } message WorkerGroupSpec { @@ -304,9 +327,9 @@ message WorkerGroupSpec { string compute_template = 2 [(google.api.field_behavior) = REQUIRED]; // Optional field. This field will be used to retrieve right ray container string image = 3; - // Required. Desired replicas of the worker group + // Required. Desired replicas of the worker group int32 replicas = 4 [(google.api.field_behavior) = REQUIRED]; - // Optional. Min replicas of the worker group, can't be greater than max_replicas. + // Optional. Min replicas of the worker group, can't be greater than max_replicas. int32 min_replicas = 5; // Required. Max replicas of the worker group (>0) int32 max_replicas = 6 [(google.api.field_behavior) = REQUIRED]; @@ -316,15 +339,19 @@ message WorkerGroupSpec { repeated Volume volumes = 8; // Optional. ServiceAccount used by worker pod // Note that the service account has to be created prior to usage here - string service_account = 9; + string service_account = 9; // Optional. image pull secret used by worker pod - string image_pull_secret = 10; + string image_pull_secret = 10; // Optional. Environment variables for worker pod EnvironmentVariables environment = 11; // Optional. Annotations for the worker pod map annotations = 12; // Optional. Labels for the worker pod map labels = 13; + // Optional image pull policy We only support Always and ifNotPresent + string imagePullPolicy = 14; + // Optional. Configure the security context for the worker container for debugging etc. + SecurityContext security_context = 15; } message ClusterEvent { @@ -334,7 +361,7 @@ message ClusterEvent { // Human readable name for event. string name = 2; - // Event creation time. + // Event creation time. google.protobuf.Timestamp created_at = 3; // The first time the event occur. @@ -351,7 +378,7 @@ message ClusterEvent { // Type of this event (Normal, Warning), new types could be added in the future string type = 8; - + // The number of times this event has occurred. int32 count = 9; } diff --git a/proto/config.proto b/proto/config.proto index 63e4212769b..314f6ddeea3 100644 --- a/proto/config.proto +++ b/proto/config.proto @@ -53,7 +53,7 @@ service ComputeTemplateService { }; } - // Deletes a compute template by its name and namespace + // Deletes a compute template by its name and namespace rpc DeleteComputeTemplate(DeleteComputeTemplateRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/apis/v1/namespaces/{namespace}/compute_templates/{name}" @@ -76,7 +76,7 @@ message GetComputeTemplateRequest { } message ListComputeTemplatesRequest { - // Required. The namespace of the compute templates to be retrieved. + // Required. The namespace of the compute templates to be retrieved. string namespace = 1 [(google.api.field_behavior) = REQUIRED]; // TODO: support paganation later } @@ -128,6 +128,8 @@ message ComputeTemplate { string gpu_accelerator = 6; // Optional pod tolerations repeated PodToleration tolerations = 7; + // Optional. Name and number of the extended resources + map extended_resources = 8; } // This service is not implemented. diff --git a/proto/error.proto b/proto/error.proto index 732cd869b4c..f19e1c40100 100644 --- a/proto/error.proto +++ b/proto/error.proto @@ -12,4 +12,3 @@ message Status { int32 code = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; repeated google.protobuf.Any details = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; } - diff --git a/proto/go.mod b/proto/go.mod index 1efeeed141c..7fb7b65b06f 100644 --- a/proto/go.mod +++ b/proto/go.mod @@ -3,15 +3,15 @@ module github.com/ray-project/kuberay/proto go 1.20 require ( - github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0 - google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af - google.golang.org/grpc v1.40.0 - google.golang.org/protobuf v1.32.0 + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 + google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d + google.golang.org/grpc v1.64.0 + google.golang.org/protobuf v1.34.2 ) require ( - github.com/golang/protobuf v1.5.2 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d // indirect ) diff --git a/proto/go.sum b/proto/go.sum index f92de083029..a396e0e651c 100644 --- a/proto/go.sum +++ b/proto/go.sum @@ -1,412 +1,17 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= -cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= -cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= -cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= -cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= -cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= -cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= -cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= -github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0 h1:rgxjzoDmDXw5q8HONgyHhBas4to0/XWRo/gPpJhsUNQ= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.6.0/go.mod h1:qrJPVzv9YlhsrxJc3P/Q85nr0w1lIRikTl4JlhdDH5w= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= -golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= -google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= -google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= -google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af h1:aLMMXFYqw01RA6XJim5uaN+afqNNjc9P8HPAbnpnc5s= -google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= -google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.40.0 h1:AGJ0Ih4mHjSeibYkFGh1dD9KJ/eOtZ93I6hoHhukQ5Q= -google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d h1:Aqf0fiIdUQEj0Gn9mKFFXoQfTTEaNopWpfVyYADxiSg= +google.golang.org/genproto/googleapis/api v0.0.0-20240624140628-dc46fd24d27d/go.mod h1:Od4k8V1LQSizPRUK4OzZ7TBE/20k+jPczUDAEyvn69Y= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d h1:k3zyW3BYYR30e8v3x0bTDdE9vpYFjZHK+HcyqkrppWk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240624140628-dc46fd24d27d/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= diff --git a/proto/go_client/cluster.pb.go b/proto/go_client/cluster.pb.go index c7432124197..acce2b2f7c0 100644 --- a/proto/go_client/cluster.pb.go +++ b/proto/go_client/cluster.pb.go @@ -1283,6 +1283,124 @@ func (x *Volume) GetItems() map[string]string { return nil } +// Adds and removes POSIX capabilities from running containers. +type Capabilities struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Optional. Added capabilities + Add []string `protobuf:"bytes,1,rep,name=add,proto3" json:"add,omitempty"` + // Optional. Removed capabilities + Drop []string `protobuf:"bytes,2,rep,name=drop,proto3" json:"drop,omitempty"` +} + +func (x *Capabilities) Reset() { + *x = Capabilities{} + if protoimpl.UnsafeEnabled { + mi := &file_cluster_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Capabilities) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Capabilities) ProtoMessage() {} + +func (x *Capabilities) ProtoReflect() protoreflect.Message { + mi := &file_cluster_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Capabilities.ProtoReflect.Descriptor instead. +func (*Capabilities) Descriptor() ([]byte, []int) { + return file_cluster_proto_rawDescGZIP(), []int{13} +} + +func (x *Capabilities) GetAdd() []string { + if x != nil { + return x.Add + } + return nil +} + +func (x *Capabilities) GetDrop() []string { + if x != nil { + return x.Drop + } + return nil +} + +// SecurityContext holds security configuration that will be applied to a container. +// Some fields are present in both SecurityContext and PodSecurityContext. When both +// are set, the values in SecurityContext take precedence. +type SecurityContext struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Optional. The capabilities to add/drop when running containers. + Capabilities *Capabilities `protobuf:"bytes,1,opt,name=capabilities,proto3" json:"capabilities,omitempty"` + // Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false. + Privileged *bool `protobuf:"varint,2,opt,name=privileged,proto3,oneof" json:"privileged,omitempty"` +} + +func (x *SecurityContext) Reset() { + *x = SecurityContext{} + if protoimpl.UnsafeEnabled { + mi := &file_cluster_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SecurityContext) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SecurityContext) ProtoMessage() {} + +func (x *SecurityContext) ProtoReflect() protoreflect.Message { + mi := &file_cluster_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SecurityContext.ProtoReflect.Descriptor instead. +func (*SecurityContext) Descriptor() ([]byte, []int) { + return file_cluster_proto_rawDescGZIP(), []int{14} +} + +func (x *SecurityContext) GetCapabilities() *Capabilities { + if x != nil { + return x.Capabilities + } + return nil +} + +func (x *SecurityContext) GetPrivileged() bool { + if x != nil && x.Privileged != nil { + return *x.Privileged + } + return false +} + // Cluster HeadGroup specification type HeadGroupSpec struct { state protoimpl.MessageState @@ -1313,12 +1431,16 @@ type HeadGroupSpec struct { Annotations map[string]string `protobuf:"bytes,10,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` // Optional. Labels for the head pod Labels map[string]string `protobuf:"bytes,11,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // Optional image pull policy We only support Always and ifNotPresent + ImagePullPolicy string `protobuf:"bytes,12,opt,name=imagePullPolicy,proto3" json:"imagePullPolicy,omitempty"` + // Optional. Configure the security context for the head container for debugging etc. + SecurityContext *SecurityContext `protobuf:"bytes,13,opt,name=security_context,json=securityContext,proto3" json:"security_context,omitempty"` } func (x *HeadGroupSpec) Reset() { *x = HeadGroupSpec{} if protoimpl.UnsafeEnabled { - mi := &file_cluster_proto_msgTypes[13] + mi := &file_cluster_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1331,7 +1453,7 @@ func (x *HeadGroupSpec) String() string { func (*HeadGroupSpec) ProtoMessage() {} func (x *HeadGroupSpec) ProtoReflect() protoreflect.Message { - mi := &file_cluster_proto_msgTypes[13] + mi := &file_cluster_proto_msgTypes[15] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1344,7 +1466,7 @@ func (x *HeadGroupSpec) ProtoReflect() protoreflect.Message { // Deprecated: Use HeadGroupSpec.ProtoReflect.Descriptor instead. func (*HeadGroupSpec) Descriptor() ([]byte, []int) { - return file_cluster_proto_rawDescGZIP(), []int{13} + return file_cluster_proto_rawDescGZIP(), []int{15} } func (x *HeadGroupSpec) GetComputeTemplate() string { @@ -1424,6 +1546,20 @@ func (x *HeadGroupSpec) GetLabels() map[string]string { return nil } +func (x *HeadGroupSpec) GetImagePullPolicy() string { + if x != nil { + return x.ImagePullPolicy + } + return "" +} + +func (x *HeadGroupSpec) GetSecurityContext() *SecurityContext { + if x != nil { + return x.SecurityContext + } + return nil +} + type WorkerGroupSpec struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1456,12 +1592,16 @@ type WorkerGroupSpec struct { Annotations map[string]string `protobuf:"bytes,12,rep,name=annotations,proto3" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` // Optional. Labels for the worker pod Labels map[string]string `protobuf:"bytes,13,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // Optional image pull policy We only support Always and ifNotPresent + ImagePullPolicy string `protobuf:"bytes,14,opt,name=imagePullPolicy,proto3" json:"imagePullPolicy,omitempty"` + // Optional. Configure the security context for the worker container for debugging etc. + SecurityContext *SecurityContext `protobuf:"bytes,15,opt,name=security_context,json=securityContext,proto3" json:"security_context,omitempty"` } func (x *WorkerGroupSpec) Reset() { *x = WorkerGroupSpec{} if protoimpl.UnsafeEnabled { - mi := &file_cluster_proto_msgTypes[14] + mi := &file_cluster_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1474,7 +1614,7 @@ func (x *WorkerGroupSpec) String() string { func (*WorkerGroupSpec) ProtoMessage() {} func (x *WorkerGroupSpec) ProtoReflect() protoreflect.Message { - mi := &file_cluster_proto_msgTypes[14] + mi := &file_cluster_proto_msgTypes[16] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1487,7 +1627,7 @@ func (x *WorkerGroupSpec) ProtoReflect() protoreflect.Message { // Deprecated: Use WorkerGroupSpec.ProtoReflect.Descriptor instead. func (*WorkerGroupSpec) Descriptor() ([]byte, []int) { - return file_cluster_proto_rawDescGZIP(), []int{14} + return file_cluster_proto_rawDescGZIP(), []int{16} } func (x *WorkerGroupSpec) GetGroupName() string { @@ -1581,6 +1721,20 @@ func (x *WorkerGroupSpec) GetLabels() map[string]string { return nil } +func (x *WorkerGroupSpec) GetImagePullPolicy() string { + if x != nil { + return x.ImagePullPolicy + } + return "" +} + +func (x *WorkerGroupSpec) GetSecurityContext() *SecurityContext { + if x != nil { + return x.SecurityContext + } + return nil +} + type ClusterEvent struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1609,7 +1763,7 @@ type ClusterEvent struct { func (x *ClusterEvent) Reset() { *x = ClusterEvent{} if protoimpl.UnsafeEnabled { - mi := &file_cluster_proto_msgTypes[15] + mi := &file_cluster_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1622,7 +1776,7 @@ func (x *ClusterEvent) String() string { func (*ClusterEvent) ProtoMessage() {} func (x *ClusterEvent) ProtoReflect() protoreflect.Message { - mi := &file_cluster_proto_msgTypes[15] + mi := &file_cluster_proto_msgTypes[17] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1635,7 +1789,7 @@ func (x *ClusterEvent) ProtoReflect() protoreflect.Message { // Deprecated: Use ClusterEvent.ProtoReflect.Descriptor instead. func (*ClusterEvent) Descriptor() ([]byte, []int) { - return file_cluster_proto_rawDescGZIP(), []int{15} + return file_cluster_proto_rawDescGZIP(), []int{17} } func (x *ClusterEvent) GetId() string { @@ -1920,95 +2074,120 @@ var file_cluster_proto_rawDesc = []byte{ 0x11, 0x0a, 0x0d, 0x42, 0x49, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x4c, 0x10, 0x02, 0x22, 0x27, 0x0a, 0x0a, 0x41, 0x63, 0x63, 0x65, 0x73, 0x73, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x52, 0x57, 0x4f, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x52, 0x4f, 0x58, - 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x52, 0x57, 0x58, 0x10, 0x02, 0x22, 0xf5, 0x05, 0x0a, 0x0d, - 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x12, 0x2e, 0x0a, - 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0f, 0x63, 0x6f, - 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x14, 0x0a, - 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6d, - 0x61, 0x67, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x73, 0x65, 0x72, 0x76, 0x69, - 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, - 0x49, 0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x65, - 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x49, 0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x12, 0x57, 0x0a, 0x10, - 0x72, 0x61, 0x79, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, - 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x48, - 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x52, 0x61, 0x79, - 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0e, 0x72, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, - 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x27, 0x0a, 0x07, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x73, - 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x56, - 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x52, 0x07, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x73, 0x12, 0x27, - 0x0a, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x61, 0x63, 0x63, 0x6f, 0x75, 0x6e, - 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, - 0x41, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, 0x11, 0x69, 0x6d, 0x61, 0x67, 0x65, - 0x5f, 0x70, 0x75, 0x6c, 0x6c, 0x5f, 0x73, 0x65, 0x63, 0x72, 0x65, 0x74, 0x18, 0x08, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, 0x75, 0x6c, 0x6c, 0x53, 0x65, 0x63, - 0x72, 0x65, 0x74, 0x12, 0x3d, 0x0a, 0x0b, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, - 0x6e, 0x74, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, - 0x61, 0x62, 0x6c, 0x65, 0x73, 0x52, 0x0b, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, - 0x6e, 0x74, 0x12, 0x47, 0x0a, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x41, 0x6e, - 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, - 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x38, 0x0a, 0x06, 0x6c, - 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, - 0x63, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, - 0x61, 0x62, 0x65, 0x6c, 0x73, 0x1a, 0x41, 0x0a, 0x13, 0x52, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, - 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3e, 0x0a, 0x10, 0x41, 0x6e, 0x6e, 0x6f, - 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x39, 0x0a, 0x0b, 0x4c, 0x61, 0x62, 0x65, - 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, - 0x02, 0x38, 0x01, 0x22, 0xc4, 0x06, 0x0a, 0x0f, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x47, 0x72, - 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x12, 0x22, 0x0a, 0x0a, 0x67, 0x72, 0x6f, 0x75, 0x70, - 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, - 0x52, 0x09, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2e, 0x0a, 0x10, 0x63, - 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0f, 0x63, 0x6f, 0x6d, 0x70, - 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x69, - 0x6d, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, - 0x65, 0x12, 0x1f, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x05, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, - 0x61, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, - 0x61, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x69, 0x6e, 0x52, 0x65, 0x70, - 0x6c, 0x69, 0x63, 0x61, 0x73, 0x12, 0x26, 0x0a, 0x0c, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, 0x70, - 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x42, 0x03, 0xe0, 0x41, 0x02, - 0x52, 0x0b, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x12, 0x59, 0x0a, - 0x10, 0x72, 0x61, 0x79, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, - 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, - 0x52, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0e, 0x72, 0x61, 0x79, 0x53, 0x74, 0x61, - 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x27, 0x0a, 0x07, 0x76, 0x6f, 0x6c, 0x75, - 0x6d, 0x65, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0d, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x52, 0x07, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, - 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x61, 0x63, 0x63, - 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, 0x72, 0x76, - 0x69, 0x63, 0x65, 0x41, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, 0x11, 0x69, 0x6d, - 0x61, 0x67, 0x65, 0x5f, 0x70, 0x75, 0x6c, 0x6c, 0x5f, 0x73, 0x65, 0x63, 0x72, 0x65, 0x74, 0x18, - 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, 0x75, 0x6c, 0x6c, - 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x12, 0x3d, 0x0a, 0x0b, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, - 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, - 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x52, 0x0b, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, - 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x49, 0x0a, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x70, 0x72, 0x6f, + 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x52, 0x57, 0x58, 0x10, 0x02, 0x22, 0x34, 0x0a, 0x0c, 0x43, + 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x61, + 0x64, 0x64, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x03, 0x61, 0x64, 0x64, 0x12, 0x12, 0x0a, + 0x04, 0x64, 0x72, 0x6f, 0x70, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x64, 0x72, 0x6f, + 0x70, 0x22, 0x7e, 0x0a, 0x0f, 0x53, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x12, 0x37, 0x0a, 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, + 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x43, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x52, + 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x12, 0x23, 0x0a, + 0x0a, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, 0x67, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x08, 0x48, 0x00, 0x52, 0x0a, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, 0x67, 0x65, 0x64, 0x88, + 0x01, 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, 0x67, 0x65, + 0x64, 0x22, 0xe2, 0x06, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, + 0x70, 0x65, 0x63, 0x12, 0x2e, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, + 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, + 0x41, 0x02, 0x52, 0x0f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, + 0x61, 0x74, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0d, + 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x49, 0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x49, 0x6e, 0x67, 0x72, 0x65, + 0x73, 0x73, 0x12, 0x57, 0x0a, 0x10, 0x72, 0x61, 0x79, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, + 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, + 0x65, 0x63, 0x2e, 0x52, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0e, 0x72, 0x61, 0x79, + 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x27, 0x0a, 0x07, 0x76, + 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0d, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x52, 0x07, 0x76, 0x6f, 0x6c, + 0x75, 0x6d, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, + 0x61, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x41, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, + 0x11, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x70, 0x75, 0x6c, 0x6c, 0x5f, 0x73, 0x65, 0x63, 0x72, + 0x65, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, + 0x75, 0x6c, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x12, 0x3d, 0x0a, 0x0b, 0x65, 0x6e, 0x76, + 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, + 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x52, 0x0b, 0x65, 0x6e, 0x76, + 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x47, 0x0a, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, + 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, + 0x70, 0x65, 0x63, 0x2e, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x38, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x20, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x48, 0x65, 0x61, 0x64, 0x47, 0x72, + 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x28, 0x0a, 0x0f, 0x69, + 0x6d, 0x61, 0x67, 0x65, 0x50, 0x75, 0x6c, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x0c, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, 0x75, 0x6c, 0x6c, 0x50, + 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x41, 0x0a, 0x10, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, + 0x79, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x16, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x0f, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, + 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x1a, 0x41, 0x0a, 0x13, 0x52, 0x61, 0x79, 0x53, + 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3e, 0x0a, 0x10, 0x41, + 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x39, 0x0a, 0x0b, 0x4c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, + 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xb1, 0x07, 0x0a, 0x0f, 0x57, 0x6f, 0x72, 0x6b, 0x65, + 0x72, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x12, 0x22, 0x0a, 0x0a, 0x67, 0x72, + 0x6f, 0x75, 0x70, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, + 0xe0, 0x41, 0x02, 0x52, 0x09, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2e, + 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0f, 0x63, + 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x14, + 0x0a, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, + 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1f, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x08, 0x72, 0x65, 0x70, + 0x6c, 0x69, 0x63, 0x61, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x65, 0x70, + 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x69, 0x6e, + 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x12, 0x26, 0x0a, 0x0c, 0x6d, 0x61, 0x78, 0x5f, + 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x42, 0x03, + 0xe0, 0x41, 0x02, 0x52, 0x0b, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x73, + 0x12, 0x59, 0x0a, 0x10, 0x72, 0x61, 0x79, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x70, 0x61, + 0x72, 0x61, 0x6d, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, - 0x65, 0x63, 0x2e, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x52, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x3a, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x22, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x47, - 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x1a, 0x41, 0x0a, 0x13, + 0x65, 0x63, 0x2e, 0x52, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x0e, 0x72, 0x61, 0x79, + 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x27, 0x0a, 0x07, 0x76, + 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0d, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x52, 0x07, 0x76, 0x6f, 0x6c, + 0x75, 0x6d, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, + 0x61, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x41, 0x63, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, + 0x11, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x70, 0x75, 0x6c, 0x6c, 0x5f, 0x73, 0x65, 0x63, 0x72, + 0x65, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, + 0x75, 0x6c, 0x6c, 0x53, 0x65, 0x63, 0x72, 0x65, 0x74, 0x12, 0x3d, 0x0a, 0x0b, 0x65, 0x6e, 0x76, + 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, + 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x52, 0x0b, 0x65, 0x6e, 0x76, + 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x49, 0x0a, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, + 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x47, 0x72, 0x6f, 0x75, + 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x3a, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x0d, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x57, 0x6f, 0x72, 0x6b, + 0x65, 0x72, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x4c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, + 0x28, 0x0a, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, 0x75, 0x6c, 0x6c, 0x50, 0x6f, 0x6c, 0x69, + 0x63, 0x79, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x50, + 0x75, 0x6c, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x41, 0x0a, 0x10, 0x73, 0x65, 0x63, + 0x75, 0x72, 0x69, 0x74, 0x79, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x0f, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x63, 0x75, + 0x72, 0x69, 0x74, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x0f, 0x73, 0x65, 0x63, + 0x75, 0x72, 0x69, 0x74, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x1a, 0x41, 0x0a, 0x13, 0x52, 0x61, 0x79, 0x53, 0x74, 0x61, 0x72, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, @@ -2101,7 +2280,7 @@ func file_cluster_proto_rawDescGZIP() []byte { } var file_cluster_proto_enumTypes = make([]protoimpl.EnumInfo, 6) -var file_cluster_proto_msgTypes = make([]protoimpl.MessageInfo, 27) +var file_cluster_proto_msgTypes = make([]protoimpl.MessageInfo, 29) var file_cluster_proto_goTypes = []interface{}{ (EnvValueFrom_Source)(0), // 0: proto.EnvValueFrom.Source (Cluster_Environment)(0), // 1: proto.Cluster.Environment @@ -2122,77 +2301,82 @@ var file_cluster_proto_goTypes = []interface{}{ (*Cluster)(nil), // 16: proto.Cluster (*ClusterSpec)(nil), // 17: proto.ClusterSpec (*Volume)(nil), // 18: proto.Volume - (*HeadGroupSpec)(nil), // 19: proto.HeadGroupSpec - (*WorkerGroupSpec)(nil), // 20: proto.WorkerGroupSpec - (*ClusterEvent)(nil), // 21: proto.ClusterEvent - nil, // 22: proto.EnvironmentVariables.ValuesEntry - nil, // 23: proto.EnvironmentVariables.ValuesFromEntry - nil, // 24: proto.Cluster.AnnotationsEntry - nil, // 25: proto.Cluster.ServiceEndpointEntry - nil, // 26: proto.Volume.ItemsEntry - nil, // 27: proto.HeadGroupSpec.RayStartParamsEntry - nil, // 28: proto.HeadGroupSpec.AnnotationsEntry - nil, // 29: proto.HeadGroupSpec.LabelsEntry - nil, // 30: proto.WorkerGroupSpec.RayStartParamsEntry - nil, // 31: proto.WorkerGroupSpec.AnnotationsEntry - nil, // 32: proto.WorkerGroupSpec.LabelsEntry - (*timestamppb.Timestamp)(nil), // 33: google.protobuf.Timestamp - (*emptypb.Empty)(nil), // 34: google.protobuf.Empty + (*Capabilities)(nil), // 19: proto.Capabilities + (*SecurityContext)(nil), // 20: proto.SecurityContext + (*HeadGroupSpec)(nil), // 21: proto.HeadGroupSpec + (*WorkerGroupSpec)(nil), // 22: proto.WorkerGroupSpec + (*ClusterEvent)(nil), // 23: proto.ClusterEvent + nil, // 24: proto.EnvironmentVariables.ValuesEntry + nil, // 25: proto.EnvironmentVariables.ValuesFromEntry + nil, // 26: proto.Cluster.AnnotationsEntry + nil, // 27: proto.Cluster.ServiceEndpointEntry + nil, // 28: proto.Volume.ItemsEntry + nil, // 29: proto.HeadGroupSpec.RayStartParamsEntry + nil, // 30: proto.HeadGroupSpec.AnnotationsEntry + nil, // 31: proto.HeadGroupSpec.LabelsEntry + nil, // 32: proto.WorkerGroupSpec.RayStartParamsEntry + nil, // 33: proto.WorkerGroupSpec.AnnotationsEntry + nil, // 34: proto.WorkerGroupSpec.LabelsEntry + (*timestamppb.Timestamp)(nil), // 35: google.protobuf.Timestamp + (*emptypb.Empty)(nil), // 36: google.protobuf.Empty } var file_cluster_proto_depIdxs = []int32{ 16, // 0: proto.CreateClusterRequest.cluster:type_name -> proto.Cluster 16, // 1: proto.ListClustersResponse.clusters:type_name -> proto.Cluster 16, // 2: proto.ListAllClustersResponse.clusters:type_name -> proto.Cluster 0, // 3: proto.EnvValueFrom.source:type_name -> proto.EnvValueFrom.Source - 22, // 4: proto.EnvironmentVariables.values:type_name -> proto.EnvironmentVariables.ValuesEntry - 23, // 5: proto.EnvironmentVariables.valuesFrom:type_name -> proto.EnvironmentVariables.ValuesFromEntry + 24, // 4: proto.EnvironmentVariables.values:type_name -> proto.EnvironmentVariables.ValuesEntry + 25, // 5: proto.EnvironmentVariables.valuesFrom:type_name -> proto.EnvironmentVariables.ValuesFromEntry 14, // 6: proto.AutoscalerOptions.envs:type_name -> proto.EnvironmentVariables 18, // 7: proto.AutoscalerOptions.volumes:type_name -> proto.Volume 1, // 8: proto.Cluster.environment:type_name -> proto.Cluster.Environment 17, // 9: proto.Cluster.cluster_spec:type_name -> proto.ClusterSpec - 24, // 10: proto.Cluster.annotations:type_name -> proto.Cluster.AnnotationsEntry + 26, // 10: proto.Cluster.annotations:type_name -> proto.Cluster.AnnotationsEntry 14, // 11: proto.Cluster.envs:type_name -> proto.EnvironmentVariables - 33, // 12: proto.Cluster.created_at:type_name -> google.protobuf.Timestamp - 33, // 13: proto.Cluster.deleted_at:type_name -> google.protobuf.Timestamp - 21, // 14: proto.Cluster.events:type_name -> proto.ClusterEvent - 25, // 15: proto.Cluster.service_endpoint:type_name -> proto.Cluster.ServiceEndpointEntry - 19, // 16: proto.ClusterSpec.head_group_spec:type_name -> proto.HeadGroupSpec - 20, // 17: proto.ClusterSpec.worker_group_spec:type_name -> proto.WorkerGroupSpec + 35, // 12: proto.Cluster.created_at:type_name -> google.protobuf.Timestamp + 35, // 13: proto.Cluster.deleted_at:type_name -> google.protobuf.Timestamp + 23, // 14: proto.Cluster.events:type_name -> proto.ClusterEvent + 27, // 15: proto.Cluster.service_endpoint:type_name -> proto.Cluster.ServiceEndpointEntry + 21, // 16: proto.ClusterSpec.head_group_spec:type_name -> proto.HeadGroupSpec + 22, // 17: proto.ClusterSpec.worker_group_spec:type_name -> proto.WorkerGroupSpec 15, // 18: proto.ClusterSpec.autoscalerOptions:type_name -> proto.AutoscalerOptions 2, // 19: proto.Volume.volume_type:type_name -> proto.Volume.VolumeType 3, // 20: proto.Volume.host_path_type:type_name -> proto.Volume.HostPathType 4, // 21: proto.Volume.mount_propagation_mode:type_name -> proto.Volume.MountPropagationMode 5, // 22: proto.Volume.accessMode:type_name -> proto.Volume.AccessMode - 26, // 23: proto.Volume.items:type_name -> proto.Volume.ItemsEntry - 27, // 24: proto.HeadGroupSpec.ray_start_params:type_name -> proto.HeadGroupSpec.RayStartParamsEntry - 18, // 25: proto.HeadGroupSpec.volumes:type_name -> proto.Volume - 14, // 26: proto.HeadGroupSpec.environment:type_name -> proto.EnvironmentVariables - 28, // 27: proto.HeadGroupSpec.annotations:type_name -> proto.HeadGroupSpec.AnnotationsEntry - 29, // 28: proto.HeadGroupSpec.labels:type_name -> proto.HeadGroupSpec.LabelsEntry - 30, // 29: proto.WorkerGroupSpec.ray_start_params:type_name -> proto.WorkerGroupSpec.RayStartParamsEntry - 18, // 30: proto.WorkerGroupSpec.volumes:type_name -> proto.Volume - 14, // 31: proto.WorkerGroupSpec.environment:type_name -> proto.EnvironmentVariables - 31, // 32: proto.WorkerGroupSpec.annotations:type_name -> proto.WorkerGroupSpec.AnnotationsEntry - 32, // 33: proto.WorkerGroupSpec.labels:type_name -> proto.WorkerGroupSpec.LabelsEntry - 33, // 34: proto.ClusterEvent.created_at:type_name -> google.protobuf.Timestamp - 33, // 35: proto.ClusterEvent.first_timestamp:type_name -> google.protobuf.Timestamp - 33, // 36: proto.ClusterEvent.last_timestamp:type_name -> google.protobuf.Timestamp - 13, // 37: proto.EnvironmentVariables.ValuesFromEntry.value:type_name -> proto.EnvValueFrom - 6, // 38: proto.ClusterService.CreateCluster:input_type -> proto.CreateClusterRequest - 7, // 39: proto.ClusterService.GetCluster:input_type -> proto.GetClusterRequest - 8, // 40: proto.ClusterService.ListCluster:input_type -> proto.ListClustersRequest - 10, // 41: proto.ClusterService.ListAllClusters:input_type -> proto.ListAllClustersRequest - 12, // 42: proto.ClusterService.DeleteCluster:input_type -> proto.DeleteClusterRequest - 16, // 43: proto.ClusterService.CreateCluster:output_type -> proto.Cluster - 16, // 44: proto.ClusterService.GetCluster:output_type -> proto.Cluster - 9, // 45: proto.ClusterService.ListCluster:output_type -> proto.ListClustersResponse - 11, // 46: proto.ClusterService.ListAllClusters:output_type -> proto.ListAllClustersResponse - 34, // 47: proto.ClusterService.DeleteCluster:output_type -> google.protobuf.Empty - 43, // [43:48] is the sub-list for method output_type - 38, // [38:43] is the sub-list for method input_type - 38, // [38:38] is the sub-list for extension type_name - 38, // [38:38] is the sub-list for extension extendee - 0, // [0:38] is the sub-list for field type_name + 28, // 23: proto.Volume.items:type_name -> proto.Volume.ItemsEntry + 19, // 24: proto.SecurityContext.capabilities:type_name -> proto.Capabilities + 29, // 25: proto.HeadGroupSpec.ray_start_params:type_name -> proto.HeadGroupSpec.RayStartParamsEntry + 18, // 26: proto.HeadGroupSpec.volumes:type_name -> proto.Volume + 14, // 27: proto.HeadGroupSpec.environment:type_name -> proto.EnvironmentVariables + 30, // 28: proto.HeadGroupSpec.annotations:type_name -> proto.HeadGroupSpec.AnnotationsEntry + 31, // 29: proto.HeadGroupSpec.labels:type_name -> proto.HeadGroupSpec.LabelsEntry + 20, // 30: proto.HeadGroupSpec.security_context:type_name -> proto.SecurityContext + 32, // 31: proto.WorkerGroupSpec.ray_start_params:type_name -> proto.WorkerGroupSpec.RayStartParamsEntry + 18, // 32: proto.WorkerGroupSpec.volumes:type_name -> proto.Volume + 14, // 33: proto.WorkerGroupSpec.environment:type_name -> proto.EnvironmentVariables + 33, // 34: proto.WorkerGroupSpec.annotations:type_name -> proto.WorkerGroupSpec.AnnotationsEntry + 34, // 35: proto.WorkerGroupSpec.labels:type_name -> proto.WorkerGroupSpec.LabelsEntry + 20, // 36: proto.WorkerGroupSpec.security_context:type_name -> proto.SecurityContext + 35, // 37: proto.ClusterEvent.created_at:type_name -> google.protobuf.Timestamp + 35, // 38: proto.ClusterEvent.first_timestamp:type_name -> google.protobuf.Timestamp + 35, // 39: proto.ClusterEvent.last_timestamp:type_name -> google.protobuf.Timestamp + 13, // 40: proto.EnvironmentVariables.ValuesFromEntry.value:type_name -> proto.EnvValueFrom + 6, // 41: proto.ClusterService.CreateCluster:input_type -> proto.CreateClusterRequest + 7, // 42: proto.ClusterService.GetCluster:input_type -> proto.GetClusterRequest + 8, // 43: proto.ClusterService.ListCluster:input_type -> proto.ListClustersRequest + 10, // 44: proto.ClusterService.ListAllClusters:input_type -> proto.ListAllClustersRequest + 12, // 45: proto.ClusterService.DeleteCluster:input_type -> proto.DeleteClusterRequest + 16, // 46: proto.ClusterService.CreateCluster:output_type -> proto.Cluster + 16, // 47: proto.ClusterService.GetCluster:output_type -> proto.Cluster + 9, // 48: proto.ClusterService.ListCluster:output_type -> proto.ListClustersResponse + 11, // 49: proto.ClusterService.ListAllClusters:output_type -> proto.ListAllClustersResponse + 36, // 50: proto.ClusterService.DeleteCluster:output_type -> google.protobuf.Empty + 46, // [46:51] is the sub-list for method output_type + 41, // [41:46] is the sub-list for method input_type + 41, // [41:41] is the sub-list for extension type_name + 41, // [41:41] is the sub-list for extension extendee + 0, // [0:41] is the sub-list for field type_name } func init() { file_cluster_proto_init() } @@ -2358,7 +2542,7 @@ func file_cluster_proto_init() { } } file_cluster_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*HeadGroupSpec); i { + switch v := v.(*Capabilities); i { case 0: return &v.state case 1: @@ -2370,7 +2554,7 @@ func file_cluster_proto_init() { } } file_cluster_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*WorkerGroupSpec); i { + switch v := v.(*SecurityContext); i { case 0: return &v.state case 1: @@ -2382,6 +2566,30 @@ func file_cluster_proto_init() { } } file_cluster_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*HeadGroupSpec); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_cluster_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WorkerGroupSpec); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_cluster_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ClusterEvent); i { case 0: return &v.state @@ -2394,13 +2602,14 @@ func file_cluster_proto_init() { } } } + file_cluster_proto_msgTypes[14].OneofWrappers = []interface{}{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_cluster_proto_rawDesc, NumEnums: 6, - NumMessages: 27, + NumMessages: 29, NumExtensions: 0, NumServices: 1, }, diff --git a/proto/go_client/cluster.pb.gw.go b/proto/go_client/cluster.pb.gw.go index 162911c9094..4e33eb4c2c4 100644 --- a/proto/go_client/cluster.pb.gw.go +++ b/proto/go_client/cluster.pb.gw.go @@ -440,7 +440,7 @@ func RegisterClusterServiceHandlerServer(ctx context.Context, mux *runtime.Serve // RegisterClusterServiceHandlerFromEndpoint is same as RegisterClusterServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterClusterServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/config.pb.go b/proto/go_client/config.pb.go index 4a1de23e94f..0484e4a7f82 100644 --- a/proto/go_client/config.pb.go +++ b/proto/go_client/config.pb.go @@ -470,6 +470,8 @@ type ComputeTemplate struct { GpuAccelerator string `protobuf:"bytes,6,opt,name=gpu_accelerator,json=gpuAccelerator,proto3" json:"gpu_accelerator,omitempty"` // Optional pod tolerations Tolerations []*PodToleration `protobuf:"bytes,7,rep,name=tolerations,proto3" json:"tolerations,omitempty"` + // Optional. Name and number of the extended resources + ExtendedResources map[string]uint32 `protobuf:"bytes,8,rep,name=extended_resources,json=extendedResources,proto3" json:"extended_resources,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` } func (x *ComputeTemplate) Reset() { @@ -553,6 +555,13 @@ func (x *ComputeTemplate) GetTolerations() []*PodToleration { return nil } +func (x *ComputeTemplate) GetExtendedResources() map[string]uint32 { + if x != nil { + return x.ExtendedResources + } + return nil +} + type CreateImageTemplateRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1088,7 +1097,7 @@ var file_config_proto_rawDesc = []byte{ 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1b, 0x0a, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, - 0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0xf4, 0x01, 0x0a, 0x0f, 0x43, 0x6f, + 0x02, 0x52, 0x06, 0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x22, 0x98, 0x03, 0x0a, 0x0f, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, @@ -1104,162 +1113,172 @@ var file_config_proto_rawDesc = []byte{ 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x50, 0x6f, 0x64, 0x54, 0x6f, 0x6c, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0b, 0x74, 0x6f, 0x6c, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x22, 0x77, 0x0a, 0x1a, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, - 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, - 0x0a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, - 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x0d, 0x69, 0x6d, - 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, - 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, - 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x4b, 0x0a, 0x17, 0x47, 0x65, 0x74, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, - 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x39, 0x0a, 0x19, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, - 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x22, 0x5b, 0x0a, 0x1a, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x3d, 0x0a, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x0e, - 0x69, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x22, 0x1e, - 0x0a, 0x1c, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x5e, - 0x0a, 0x1d, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x3d, 0x0a, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x0e, - 0x69, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x22, 0x4e, - 0x0a, 0x1a, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0xc0, - 0x03, 0x0a, 0x0d, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, + 0x12, 0x5c, 0x0a, 0x12, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x5f, 0x72, 0x65, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x11, 0x65, 0x78, 0x74, + 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x1a, 0x44, + 0x0a, 0x16, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x3a, 0x02, 0x38, 0x01, 0x22, 0x77, 0x0a, 0x1a, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, + 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, + 0x52, 0x0d, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, + 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x4b, 0x0a, + 0x17, 0x47, 0x65, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, + 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x39, 0x0a, 0x19, 0x4c, 0x69, + 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, + 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, + 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x5b, 0x0a, 0x1a, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, + 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, + 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x52, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, + 0x65, 0x73, 0x22, 0x1e, 0x0a, 0x1c, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x49, 0x6d, 0x61, + 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x22, 0x5e, 0x0a, 0x1d, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x49, 0x6d, 0x61, + 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x0f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, + 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x52, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, + 0x65, 0x73, 0x22, 0x4e, 0x0a, 0x1a, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, + 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, - 0x63, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x69, 0x6d, 0x61, 0x67, 0x65, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x62, 0x61, 0x73, 0x65, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x69, 0x70, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, - 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x70, 0x69, 0x70, 0x50, 0x61, 0x63, 0x6b, - 0x61, 0x67, 0x65, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x5f, 0x70, 0x61, - 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x63, 0x6f, - 0x6e, 0x64, 0x61, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x73, - 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x18, 0x06, - 0x20, 0x03, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x50, 0x61, 0x63, 0x6b, - 0x61, 0x67, 0x65, 0x73, 0x12, 0x63, 0x0a, 0x15, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, - 0x65, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x18, 0x07, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, - 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x52, 0x14, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, - 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x63, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x73, 0x18, 0x08, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0e, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, - 0x64, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x1a, 0x47, 0x0a, 0x19, 0x45, 0x6e, 0x76, 0x69, - 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x32, 0x94, 0x06, 0x0a, 0x16, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0xa1, 0x01, 0x0a, - 0x15, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, - 0x72, 0x65, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, - 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, - 0x61, 0x74, 0x65, 0x22, 0x4b, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x45, 0x22, 0x31, 0x2f, 0x61, 0x70, - 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, - 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x63, 0x6f, 0x6d, - 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x3a, 0x10, - 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x12, 0x90, 0x01, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, - 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x20, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x22, 0x40, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x3a, 0x12, 0x38, 0x2f, 0x61, 0x70, 0x69, 0x73, - 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, - 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, - 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, - 0x6d, 0x65, 0x7d, 0x12, 0x9a, 0x01, 0x0a, 0x14, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6d, 0x70, - 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, 0x22, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, - 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6d, - 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x39, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x33, 0x12, 0x31, 0x2f, + 0x63, 0x65, 0x22, 0xc0, 0x03, 0x0a, 0x0d, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, + 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, + 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x69, + 0x6d, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x62, 0x61, 0x73, 0x65, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x69, 0x70, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x61, 0x67, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x70, 0x69, 0x70, + 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x6f, 0x6e, 0x64, + 0x61, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x64, 0x61, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x12, + 0x27, 0x0a, 0x0f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, + 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x73, 0x12, 0x63, 0x0a, 0x15, 0x65, 0x6e, 0x76, 0x69, + 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, + 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x45, 0x6e, + 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, + 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x14, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, + 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x12, 0x27, 0x0a, + 0x0f, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x73, + 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x43, 0x6f, + 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x18, + 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x1a, 0x47, 0x0a, 0x19, + 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61, 0x72, 0x69, 0x61, + 0x62, 0x6c, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x32, 0x94, 0x06, 0x0a, 0x16, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, + 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x12, 0xa1, 0x01, 0x0a, 0x15, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, + 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, + 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x16, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, + 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x4b, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x45, 0x22, + 0x31, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, + 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, + 0x65, 0x73, 0x3a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x12, 0x90, 0x01, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, + 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x20, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, + 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x40, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x3a, 0x12, 0x38, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, - 0x12, 0x8c, 0x01, 0x0a, 0x17, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x43, 0x6f, 0x6d, 0x70, - 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, 0x25, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x43, 0x6f, 0x6d, 0x70, - 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, - 0x41, 0x6c, 0x6c, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x22, 0x82, 0xd3, 0xe4, - 0x93, 0x02, 0x1c, 0x12, 0x1a, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x63, 0x6f, - 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, - 0x96, 0x01, 0x0a, 0x15, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, - 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, - 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x40, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x3a, 0x2a, 0x38, - 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, - 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, - 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x32, 0xcc, 0x04, 0x0a, 0x14, 0x49, 0x6d, 0x61, - 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x12, 0x80, 0x01, 0x0a, 0x13, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x21, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x22, 0x30, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2a, 0x22, 0x18, 0x2f, 0x61, 0x70, 0x69, - 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, - 0x61, 0x74, 0x65, 0x73, 0x3a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, - 0x6c, 0x61, 0x74, 0x65, 0x12, 0x88, 0x01, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x49, 0x6d, 0x61, 0x67, - 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x1e, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x47, 0x65, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, - 0x3e, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x38, 0x12, 0x36, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, + 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x12, 0x9a, 0x01, 0x0a, 0x14, 0x4c, 0x69, 0x73, 0x74, + 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, + 0x12, 0x22, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x43, 0x6f, 0x6d, + 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, + 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x39, 0x82, 0xd3, 0xe4, 0x93, 0x02, + 0x33, 0x12, 0x31, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, + 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, + 0x65, 0x7d, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, + 0x61, 0x74, 0x65, 0x73, 0x12, 0x8c, 0x01, 0x0a, 0x17, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, + 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, + 0x12, 0x25, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, + 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x43, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x22, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x1c, 0x12, 0x1a, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, + 0x31, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x73, 0x12, 0x96, 0x01, 0x0a, 0x15, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x43, 0x6f, + 0x6d, 0x70, 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x43, 0x6f, 0x6d, 0x70, + 0x75, 0x74, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x40, 0x82, 0xd3, 0xe4, 0x93, + 0x02, 0x3a, 0x2a, 0x38, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, + 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, + 0x63, 0x65, 0x7d, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x32, 0xcc, 0x04, 0x0a, + 0x14, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x80, 0x01, 0x0a, 0x13, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x21, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, + 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x14, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x30, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2a, 0x22, 0x18, + 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, + 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x3a, 0x0e, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, + 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x88, 0x01, 0x0a, 0x10, 0x47, 0x65, 0x74, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x1e, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x47, 0x65, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, + 0x61, 0x74, 0x65, 0x22, 0x3e, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x38, 0x12, 0x36, 0x2f, 0x61, 0x70, + 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, + 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x69, 0x6d, 0x61, + 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, + 0x6d, 0x65, 0x7d, 0x12, 0x92, 0x01, 0x0a, 0x12, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, + 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, 0x20, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x37, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x31, 0x12, 0x2f, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, - 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x12, - 0x92, 0x01, 0x0a, 0x12, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, 0x20, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, - 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x37, 0x82, 0xd3, 0xe4, - 0x93, 0x02, 0x31, 0x12, 0x2f, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, + 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x12, 0x90, 0x01, 0x0a, 0x13, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, + 0x12, 0x21, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x49, + 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x3e, 0x82, 0xd3, 0xe4, + 0x93, 0x02, 0x38, 0x2a, 0x36, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, - 0x61, 0x74, 0x65, 0x73, 0x12, 0x90, 0x01, 0x0a, 0x13, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x49, - 0x6d, 0x61, 0x67, 0x65, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x21, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, - 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x3e, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x38, 0x2a, - 0x36, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, - 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, - 0x2f, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, - 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x42, 0x54, 0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x72, 0x61, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, - 0x74, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x72, 0x61, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, - 0x67, 0x6f, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x92, 0x41, 0x21, 0x2a, 0x01, 0x01, 0x52, - 0x1c, 0x0a, 0x07, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x12, 0x11, 0x12, 0x0f, 0x0a, 0x0d, - 0x1a, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x74, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x42, 0x54, 0x5a, 0x2e, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x72, 0x61, 0x79, 0x2d, 0x70, 0x72, + 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x72, 0x61, 0x79, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x92, 0x41, 0x21, + 0x2a, 0x01, 0x01, 0x52, 0x1c, 0x0a, 0x07, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x12, 0x11, + 0x12, 0x0f, 0x0a, 0x0d, 0x1a, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1274,7 +1293,7 @@ func file_config_proto_rawDescGZIP() []byte { return file_config_proto_rawDescData } -var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 18) +var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 19) var file_config_proto_goTypes = []interface{}{ (*CreateComputeTemplateRequest)(nil), // 0: proto.CreateComputeTemplateRequest (*GetComputeTemplateRequest)(nil), // 1: proto.GetComputeTemplateRequest @@ -1293,41 +1312,43 @@ var file_config_proto_goTypes = []interface{}{ (*ListAllImageTemplatesResponse)(nil), // 14: proto.ListAllImageTemplatesResponse (*DeleteImageTemplateRequest)(nil), // 15: proto.DeleteImageTemplateRequest (*ImageTemplate)(nil), // 16: proto.ImageTemplate - nil, // 17: proto.ImageTemplate.EnvironmentVariablesEntry - (*emptypb.Empty)(nil), // 18: google.protobuf.Empty + nil, // 17: proto.ComputeTemplate.ExtendedResourcesEntry + nil, // 18: proto.ImageTemplate.EnvironmentVariablesEntry + (*emptypb.Empty)(nil), // 19: google.protobuf.Empty } var file_config_proto_depIdxs = []int32{ 8, // 0: proto.CreateComputeTemplateRequest.compute_template:type_name -> proto.ComputeTemplate 8, // 1: proto.ListComputeTemplatesResponse.compute_templates:type_name -> proto.ComputeTemplate 8, // 2: proto.ListAllComputeTemplatesResponse.compute_templates:type_name -> proto.ComputeTemplate 7, // 3: proto.ComputeTemplate.tolerations:type_name -> proto.PodToleration - 16, // 4: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate - 16, // 5: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate - 16, // 6: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate - 17, // 7: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry - 0, // 8: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest - 1, // 9: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest - 2, // 10: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest - 4, // 11: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest - 6, // 12: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest - 9, // 13: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest - 10, // 14: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest - 11, // 15: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest - 15, // 16: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest - 8, // 17: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate - 8, // 18: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate - 3, // 19: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse - 5, // 20: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse - 18, // 21: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty - 16, // 22: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate - 16, // 23: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate - 12, // 24: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse - 18, // 25: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty - 17, // [17:26] is the sub-list for method output_type - 8, // [8:17] is the sub-list for method input_type - 8, // [8:8] is the sub-list for extension type_name - 8, // [8:8] is the sub-list for extension extendee - 0, // [0:8] is the sub-list for field type_name + 17, // 4: proto.ComputeTemplate.extended_resources:type_name -> proto.ComputeTemplate.ExtendedResourcesEntry + 16, // 5: proto.CreateImageTemplateRequest.image_template:type_name -> proto.ImageTemplate + 16, // 6: proto.ListImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate + 16, // 7: proto.ListAllImageTemplatesResponse.image_templates:type_name -> proto.ImageTemplate + 18, // 8: proto.ImageTemplate.environment_variables:type_name -> proto.ImageTemplate.EnvironmentVariablesEntry + 0, // 9: proto.ComputeTemplateService.CreateComputeTemplate:input_type -> proto.CreateComputeTemplateRequest + 1, // 10: proto.ComputeTemplateService.GetComputeTemplate:input_type -> proto.GetComputeTemplateRequest + 2, // 11: proto.ComputeTemplateService.ListComputeTemplates:input_type -> proto.ListComputeTemplatesRequest + 4, // 12: proto.ComputeTemplateService.ListAllComputeTemplates:input_type -> proto.ListAllComputeTemplatesRequest + 6, // 13: proto.ComputeTemplateService.DeleteComputeTemplate:input_type -> proto.DeleteComputeTemplateRequest + 9, // 14: proto.ImageTemplateService.CreateImageTemplate:input_type -> proto.CreateImageTemplateRequest + 10, // 15: proto.ImageTemplateService.GetImageTemplate:input_type -> proto.GetImageTemplateRequest + 11, // 16: proto.ImageTemplateService.ListImageTemplates:input_type -> proto.ListImageTemplatesRequest + 15, // 17: proto.ImageTemplateService.DeleteImageTemplate:input_type -> proto.DeleteImageTemplateRequest + 8, // 18: proto.ComputeTemplateService.CreateComputeTemplate:output_type -> proto.ComputeTemplate + 8, // 19: proto.ComputeTemplateService.GetComputeTemplate:output_type -> proto.ComputeTemplate + 3, // 20: proto.ComputeTemplateService.ListComputeTemplates:output_type -> proto.ListComputeTemplatesResponse + 5, // 21: proto.ComputeTemplateService.ListAllComputeTemplates:output_type -> proto.ListAllComputeTemplatesResponse + 19, // 22: proto.ComputeTemplateService.DeleteComputeTemplate:output_type -> google.protobuf.Empty + 16, // 23: proto.ImageTemplateService.CreateImageTemplate:output_type -> proto.ImageTemplate + 16, // 24: proto.ImageTemplateService.GetImageTemplate:output_type -> proto.ImageTemplate + 12, // 25: proto.ImageTemplateService.ListImageTemplates:output_type -> proto.ListImageTemplatesResponse + 19, // 26: proto.ImageTemplateService.DeleteImageTemplate:output_type -> google.protobuf.Empty + 18, // [18:27] is the sub-list for method output_type + 9, // [9:18] is the sub-list for method input_type + 9, // [9:9] is the sub-list for extension type_name + 9, // [9:9] is the sub-list for extension extendee + 0, // [0:9] is the sub-list for field type_name } func init() { file_config_proto_init() } @@ -1547,7 +1568,7 @@ func file_config_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_config_proto_rawDesc, NumEnums: 0, - NumMessages: 18, + NumMessages: 19, NumExtensions: 0, NumServices: 2, }, diff --git a/proto/go_client/config.pb.gw.go b/proto/go_client/config.pb.gw.go index 27ce40ff931..c57ef2828d1 100644 --- a/proto/go_client/config.pb.gw.go +++ b/proto/go_client/config.pb.gw.go @@ -789,7 +789,7 @@ func RegisterImageTemplateServiceHandlerServer(ctx context.Context, mux *runtime // RegisterComputeTemplateServiceHandlerFromEndpoint is same as RegisterComputeTemplateServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterComputeTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } @@ -954,7 +954,7 @@ var ( // RegisterImageTemplateServiceHandlerFromEndpoint is same as RegisterImageTemplateServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterImageTemplateServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/job.pb.go b/proto/go_client/job.pb.go index a1fc61a2d6d..fb266ef226c 100644 --- a/proto/go_client/job.pb.go +++ b/proto/go_client/job.pb.go @@ -491,6 +491,12 @@ type RayJob struct { JobDeploymentStatus string `protobuf:"bytes,15,opt,name=job_deployment_status,json=jobDeploymentStatus,proto3" json:"job_deployment_status,omitempty"` // Output. A human-readable description of the status of this operation. Message string `protobuf:"bytes,16,opt,name=message,proto3" json:"message,omitempty"` + // Output. The time when JobDeploymentStatus transitioned from 'New' to 'Initializing'. + StartTime *timestamppb.Timestamp `protobuf:"bytes,22,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` + // Output. When JobDeploymentStatus transitioned to 'Complete' status. + EndTime *timestamppb.Timestamp `protobuf:"bytes,23,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` + // Output. Name of the ray cluster. + RayClusterName string `protobuf:"bytes,24,opt,name=ray_cluster_name,json=rayClusterName,proto3" json:"ray_cluster_name,omitempty"` } func (x *RayJob) Reset() { @@ -672,6 +678,27 @@ func (x *RayJob) GetMessage() string { return "" } +func (x *RayJob) GetStartTime() *timestamppb.Timestamp { + if x != nil { + return x.StartTime + } + return nil +} + +func (x *RayJob) GetEndTime() *timestamppb.Timestamp { + if x != nil { + return x.EndTime + } + return nil +} + +func (x *RayJob) GetRayClusterName() string { + if x != nil { + return x.RayClusterName + } + return "" +} + var File_job_proto protoreflect.FileDescriptor var file_job_proto_rawDesc = []byte{ @@ -722,7 +749,7 @@ var file_job_proto_rawDesc = []byte{ 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x05, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x70, 0x75, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x63, 0x70, 0x75, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0xd9, 0x08, 0x0a, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x22, 0x84, 0x0a, 0x0a, 0x06, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x17, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, @@ -784,57 +811,67 @@ var file_job_proto_rawDesc = []byte{ 0xe0, 0x41, 0x03, 0x52, 0x13, 0x6a, 0x6f, 0x62, 0x44, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x03, 0x52, 0x07, - 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x42, 0x0a, 0x14, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x53, - 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x32, 0xba, 0x04, 0x0a, 0x0d, 0x52, 0x61, 0x79, - 0x4a, 0x6f, 0x62, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x6c, 0x0a, 0x0c, 0x43, 0x72, - 0x65, 0x61, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x1a, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x52, - 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x22, 0x31, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2b, 0x22, 0x24, 0x2f, - 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, - 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, - 0x6f, 0x62, 0x73, 0x3a, 0x03, 0x6a, 0x6f, 0x62, 0x12, 0x68, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x52, - 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x17, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x47, 0x65, - 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0d, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x22, 0x33, 0x82, - 0xd3, 0xe4, 0x93, 0x02, 0x2d, 0x12, 0x2b, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, - 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, - 0x65, 0x7d, 0x12, 0x72, 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, - 0x73, 0x12, 0x19, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, - 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x2c, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x26, - 0x12, 0x24, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, - 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, - 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x12, 0x64, 0x0a, 0x0e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, - 0x6c, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x12, 0x1c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, - 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x15, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x0f, 0x12, 0x0d, 0x2f, - 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x12, 0x77, 0x0a, 0x0c, - 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x1a, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, - 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, - 0x22, 0x33, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2d, 0x2a, 0x2b, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x3e, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, + 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x42, 0x03, 0xe0, 0x41, 0x03, 0x52, 0x09, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x3a, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x18, 0x17, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x42, 0x03, 0xe0, 0x41, 0x03, 0x52, 0x07, 0x65, 0x6e, 0x64, 0x54, + 0x69, 0x6d, 0x65, 0x12, 0x2d, 0x0a, 0x10, 0x72, 0x61, 0x79, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, + 0x41, 0x03, 0x52, 0x0e, 0x72, 0x61, 0x79, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x4e, 0x61, + 0x6d, 0x65, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, + 0x42, 0x0a, 0x14, 0x43, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x32, 0xba, 0x04, 0x0a, 0x0d, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x6c, 0x0a, 0x0c, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, + 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x1a, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x72, + 0x65, 0x61, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x0d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, + 0x22, 0x31, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2b, 0x22, 0x24, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, - 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x2f, 0x7b, - 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x42, 0x54, 0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x72, 0x61, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x2f, - 0x6b, 0x75, 0x62, 0x65, 0x72, 0x61, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, - 0x5f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x92, 0x41, 0x21, 0x2a, 0x01, 0x01, 0x52, 0x1c, 0x0a, - 0x07, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x12, 0x11, 0x12, 0x0f, 0x0a, 0x0d, 0x1a, 0x0b, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x3a, 0x03, + 0x6a, 0x6f, 0x62, 0x12, 0x68, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, + 0x12, 0x17, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x79, 0x4a, + 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0d, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x22, 0x33, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x2d, + 0x12, 0x2b, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, + 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, + 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x12, 0x72, 0x0a, + 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x12, 0x19, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x4c, 0x69, 0x73, 0x74, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x2c, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x26, 0x12, 0x24, 0x2f, 0x61, 0x70, + 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, + 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, 0x6f, 0x62, + 0x73, 0x12, 0x64, 0x0a, 0x0e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, 0x6c, 0x52, 0x61, 0x79, 0x4a, + 0x6f, 0x62, 0x73, 0x12, 0x1c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, + 0x41, 0x6c, 0x6c, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x1d, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6c, + 0x6c, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x15, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x0f, 0x12, 0x0d, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, + 0x76, 0x31, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x12, 0x77, 0x0a, 0x0c, 0x44, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x1a, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x52, 0x61, 0x79, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x33, 0x82, 0xd3, 0xe4, + 0x93, 0x02, 0x2d, 0x2a, 0x2b, 0x2f, 0x61, 0x70, 0x69, 0x73, 0x2f, 0x76, 0x31, 0x2f, 0x6e, 0x61, + 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x7d, 0x2f, 0x6a, 0x6f, 0x62, 0x73, 0x2f, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, + 0x42, 0x54, 0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x72, + 0x61, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x72, + 0x61, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x5f, 0x63, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x92, 0x41, 0x21, 0x2a, 0x01, 0x01, 0x52, 0x1c, 0x0a, 0x07, 0x64, 0x65, 0x66, 0x61, + 0x75, 0x6c, 0x74, 0x12, 0x11, 0x12, 0x0f, 0x0a, 0x0d, 0x1a, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -876,21 +913,23 @@ var file_job_proto_depIdxs = []int32{ 7, // 6: proto.RayJob.jobSubmitter:type_name -> proto.RayJobSubmitter 12, // 7: proto.RayJob.created_at:type_name -> google.protobuf.Timestamp 12, // 8: proto.RayJob.delete_at:type_name -> google.protobuf.Timestamp - 0, // 9: proto.RayJobService.CreateRayJob:input_type -> proto.CreateRayJobRequest - 1, // 10: proto.RayJobService.GetRayJob:input_type -> proto.GetRayJobRequest - 2, // 11: proto.RayJobService.ListRayJobs:input_type -> proto.ListRayJobsRequest - 4, // 12: proto.RayJobService.ListAllRayJobs:input_type -> proto.ListAllRayJobsRequest - 6, // 13: proto.RayJobService.DeleteRayJob:input_type -> proto.DeleteRayJobRequest - 8, // 14: proto.RayJobService.CreateRayJob:output_type -> proto.RayJob - 8, // 15: proto.RayJobService.GetRayJob:output_type -> proto.RayJob - 3, // 16: proto.RayJobService.ListRayJobs:output_type -> proto.ListRayJobsResponse - 5, // 17: proto.RayJobService.ListAllRayJobs:output_type -> proto.ListAllRayJobsResponse - 13, // 18: proto.RayJobService.DeleteRayJob:output_type -> google.protobuf.Empty - 14, // [14:19] is the sub-list for method output_type - 9, // [9:14] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 12, // 9: proto.RayJob.start_time:type_name -> google.protobuf.Timestamp + 12, // 10: proto.RayJob.end_time:type_name -> google.protobuf.Timestamp + 0, // 11: proto.RayJobService.CreateRayJob:input_type -> proto.CreateRayJobRequest + 1, // 12: proto.RayJobService.GetRayJob:input_type -> proto.GetRayJobRequest + 2, // 13: proto.RayJobService.ListRayJobs:input_type -> proto.ListRayJobsRequest + 4, // 14: proto.RayJobService.ListAllRayJobs:input_type -> proto.ListAllRayJobsRequest + 6, // 15: proto.RayJobService.DeleteRayJob:input_type -> proto.DeleteRayJobRequest + 8, // 16: proto.RayJobService.CreateRayJob:output_type -> proto.RayJob + 8, // 17: proto.RayJobService.GetRayJob:output_type -> proto.RayJob + 3, // 18: proto.RayJobService.ListRayJobs:output_type -> proto.ListRayJobsResponse + 5, // 19: proto.RayJobService.ListAllRayJobs:output_type -> proto.ListAllRayJobsResponse + 13, // 20: proto.RayJobService.DeleteRayJob:output_type -> google.protobuf.Empty + 16, // [16:21] is the sub-list for method output_type + 11, // [11:16] is the sub-list for method input_type + 11, // [11:11] is the sub-list for extension type_name + 11, // [11:11] is the sub-list for extension extendee + 0, // [0:11] is the sub-list for field type_name } func init() { file_job_proto_init() } diff --git a/proto/go_client/job.pb.gw.go b/proto/go_client/job.pb.gw.go index 7a90b31de8a..5cd31a6d38c 100644 --- a/proto/go_client/job.pb.gw.go +++ b/proto/go_client/job.pb.gw.go @@ -440,7 +440,7 @@ func RegisterRayJobServiceHandlerServer(ctx context.Context, mux *runtime.ServeM // RegisterRayJobServiceHandlerFromEndpoint is same as RegisterRayJobServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayJobServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/job_submission.pb.gw.go b/proto/go_client/job_submission.pb.gw.go index eafd5e65d1e..4235d661396 100644 --- a/proto/go_client/job_submission.pb.gw.go +++ b/proto/go_client/job_submission.pb.gw.go @@ -709,7 +709,7 @@ func RegisterRayJobSubmissionServiceHandlerServer(ctx context.Context, mux *runt // RegisterRayJobSubmissionServiceHandlerFromEndpoint is same as RegisterRayJobSubmissionServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayJobSubmissionServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } diff --git a/proto/go_client/serve.pb.gw.go b/proto/go_client/serve.pb.gw.go index e6a734eab27..90d9c722862 100644 --- a/proto/go_client/serve.pb.gw.go +++ b/proto/go_client/serve.pb.gw.go @@ -587,7 +587,7 @@ func RegisterRayServeServiceHandlerServer(ctx context.Context, mux *runtime.Serv // RegisterRayServeServiceHandlerFromEndpoint is same as RegisterRayServeServiceHandler but // automatically dials to "endpoint" and closes the connection when "ctx" gets done. func RegisterRayServeServiceHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { - conn, err := grpc.Dial(endpoint, opts...) + conn, err := grpc.NewClient(endpoint, opts...) if err != nil { return err } diff --git a/proto/job.proto b/proto/job.proto index 9d4dd737d3a..bc08efc35a8 100644 --- a/proto/job.proto +++ b/proto/job.proto @@ -78,7 +78,7 @@ message GetRayJobRequest { } message ListRayJobsRequest { - // Required. The namespace of the job to be retrieved. + // Required. The namespace of the job to be retrieved. string namespace = 1 [(google.api.field_behavior) = REQUIRED]; // TODO: support paganation later } @@ -134,7 +134,7 @@ message RayJob { bool shutdown_after_job_finishes = 8; // Optional. The label selectors to choose exiting clusters. If not specified, cluster_spec must be set. map cluster_selector = 9; - // Optional. The cluster template, required if the cluster_selector is not specified. + // Optional. The cluster template, required if the cluster_selector is not specified. ClusterSpec cluster_spec = 10; // Optional. TTLSecondsAfterFinished is the TTL to clean up RayCluster. int32 ttl_seconds_after_finished = 11; @@ -144,12 +144,12 @@ message RayJob { float entrypointNumCpus = 18; // Optional entrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. float entrypointNumGpus = 19; - // Optional entrypointResources specifies the custom resources and quantities to reserve + // Optional entrypointResources specifies the custom resources and quantities to reserve // for the entrypoint command. string entrypointResources = 20; // Output. The time that the job created. google.protobuf.Timestamp created_at = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; - // Output. The time that the job deleted. + // Output. The time that the job deleted. google.protobuf.Timestamp delete_at = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output. The current job status string job_status = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; @@ -157,4 +157,10 @@ message RayJob { string job_deployment_status = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output. A human-readable description of the status of this operation. string message = 16 [(google.api.field_behavior) = OUTPUT_ONLY]; + // Output. The time when JobDeploymentStatus transitioned from 'New' to 'Initializing'. + google.protobuf.Timestamp start_time = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; + // Output. When JobDeploymentStatus transitioned to 'Complete' status. + google.protobuf.Timestamp end_time = 23 [(google.api.field_behavior) = OUTPUT_ONLY]; + // Output. Name of the ray cluster. + string ray_cluster_name = 24 [(google.api.field_behavior) = OUTPUT_ONLY]; } diff --git a/proto/job_submission.proto b/proto/job_submission.proto index e3e02c78ce5..a4ad64ceedb 100644 --- a/proto/job_submission.proto +++ b/proto/job_submission.proto @@ -176,4 +176,4 @@ message JobSubmissionInfo{ map metadata = 9; // The runtime environment for the job map runtime_env = 10; -} \ No newline at end of file +} diff --git a/proto/kuberay_api.swagger.json b/proto/kuberay_api.swagger.json index 201150190a0..0bcf0027ef8 100644 --- a/proto/kuberay_api.swagger.json +++ b/proto/kuberay_api.swagger.json @@ -971,7 +971,7 @@ "FILE" ], "default": "DIRECTORY", - "description": "If indicate hostpath, we need to let user indicate which type \nthey would like to use." + "description": "If indicate hostpath, we need to let user indicate which type\nthey would like to use." }, "VolumeMountPropagationMode": { "type": "string", @@ -1053,6 +1053,26 @@ } } }, + "protoCapabilities": { + "type": "object", + "properties": { + "add": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Added capabilities" + }, + "drop": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Removed capabilities" + } + }, + "description": "Adds and removes POSIX capabilities from running containers." + }, "protoCluster": { "type": "object", "properties": { @@ -1307,6 +1327,14 @@ "type": "string" }, "title": "Optional. Labels for the head pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the head container for debugging etc." } }, "title": "Cluster HeadGroup specification", @@ -1341,6 +1369,20 @@ } } }, + "protoSecurityContext": { + "type": "object", + "properties": { + "capabilities": { + "$ref": "#/definitions/protoCapabilities", + "description": "Optional. The capabilities to add/drop when running containers." + }, + "privileged": { + "type": "boolean", + "description": "Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false." + } + }, + "description": "SecurityContext holds security configuration that will be applied to a container.\nSome fields are present in both SecurityContext and PodSecurityContext. When both\nare set, the values in SecurityContext take precedence." + }, "protoVolume": { "type": "object", "properties": { @@ -1467,6 +1509,14 @@ "type": "string" }, "title": "Optional. Labels for the worker pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the worker container for debugging etc." } }, "required": [ @@ -1540,6 +1590,14 @@ "$ref": "#/definitions/protoPodToleration" }, "title": "Optional pod tolerations" + }, + "extendedResources": { + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + }, + "title": "Optional. Name and number of the extended resources" } }, "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc", @@ -1789,7 +1847,7 @@ }, "entrypointResources": { "type": "string", - "description": "Optional entrypointResources specifies the custom resources and quantities to reserve \nfor the entrypoint command." + "description": "Optional entrypointResources specifies the custom resources and quantities to reserve\nfor the entrypoint command." }, "createdAt": { "type": "string", @@ -1817,6 +1875,23 @@ "type": "string", "description": "Output. A human-readable description of the status of this operation.", "readOnly": true + }, + "startTime": { + "type": "string", + "format": "date-time", + "description": "Output. The time when JobDeploymentStatus transitioned from 'New' to 'Initializing'.", + "readOnly": true + }, + "endTime": { + "type": "string", + "format": "date-time", + "description": "Output. When JobDeploymentStatus transitioned to 'Complete' status.", + "readOnly": true + }, + "rayClusterName": { + "type": "string", + "description": "Output. Name of the ray cluster.", + "readOnly": true } }, "title": "RayJob definition", diff --git a/proto/serve.proto b/proto/serve.proto index 28e84304c05..00e880166f0 100644 --- a/proto/serve.proto +++ b/proto/serve.proto @@ -63,7 +63,7 @@ service RayServeService { }; } - // Deletes a ray service by its name and namespace + // Deletes a ray service by its name and namespace rpc DeleteRayService(DeleteRayServiceRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/apis/v1/namespaces/{namespace}/services/{name}" @@ -109,7 +109,7 @@ message ListRayServicesRequest { message ListRayServicesResponse { // List of services - repeated RayService services = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + repeated RayService services = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // The total number of RayServices for the given query. int32 total_size = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // The token to list the next page of RayServices. @@ -129,7 +129,7 @@ message ListAllRayServicesRequest { message ListAllRayServicesResponse { // A list of services. - repeated RayService services = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + repeated RayService services = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; // The total number of RayServices for the given query. int32 total_size = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; // The token to list the next page of RayServices. @@ -164,7 +164,7 @@ message RayService { RayServiceStatus ray_service_status = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output. The time that the ray service created. google.protobuf.Timestamp created_at = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; - // Output. The time that the ray service deleted. + // Output. The time that the ray service deleted. google.protobuf.Timestamp delete_at = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; } @@ -213,7 +213,7 @@ message RayServiceEvent { string id = 1; // Output. Human readable name for event. string name = 2; - // Output. The creation time of the event. + // Output. The creation time of the event. google.protobuf.Timestamp created_at = 3; // Output. The last time the event occur. google.protobuf.Timestamp first_timestamp = 4; diff --git a/proto/swagger/cluster.swagger.json b/proto/swagger/cluster.swagger.json index 812b7903d1e..ae82c51caa2 100644 --- a/proto/swagger/cluster.swagger.json +++ b/proto/swagger/cluster.swagger.json @@ -229,7 +229,7 @@ "FILE" ], "default": "DIRECTORY", - "description": "If indicate hostpath, we need to let user indicate which type \nthey would like to use." + "description": "If indicate hostpath, we need to let user indicate which type\nthey would like to use." }, "VolumeMountPropagationMode": { "type": "string", @@ -311,6 +311,26 @@ } } }, + "protoCapabilities": { + "type": "object", + "properties": { + "add": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Added capabilities" + }, + "drop": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Removed capabilities" + } + }, + "description": "Adds and removes POSIX capabilities from running containers." + }, "protoCluster": { "type": "object", "properties": { @@ -565,6 +585,14 @@ "type": "string" }, "title": "Optional. Labels for the head pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the head container for debugging etc." } }, "title": "Cluster HeadGroup specification", @@ -599,6 +627,20 @@ } } }, + "protoSecurityContext": { + "type": "object", + "properties": { + "capabilities": { + "$ref": "#/definitions/protoCapabilities", + "description": "Optional. The capabilities to add/drop when running containers." + }, + "privileged": { + "type": "boolean", + "description": "Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false." + } + }, + "description": "SecurityContext holds security configuration that will be applied to a container.\nSome fields are present in both SecurityContext and PodSecurityContext. When both\nare set, the values in SecurityContext take precedence." + }, "protoVolume": { "type": "object", "properties": { @@ -725,6 +767,14 @@ "type": "string" }, "title": "Optional. Labels for the worker pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the worker container for debugging etc." } }, "required": [ diff --git a/proto/swagger/config.swagger.json b/proto/swagger/config.swagger.json index b359e2288ec..7548300d108 100644 --- a/proto/swagger/config.swagger.json +++ b/proto/swagger/config.swagger.json @@ -409,6 +409,14 @@ "$ref": "#/definitions/protoPodToleration" }, "title": "Optional pod tolerations" + }, + "extendedResources": { + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "int64" + }, + "title": "Optional. Name and number of the extended resources" } }, "title": "ComputeTemplate can be reused by any compute units like worker group, workspace, image build job, etc", diff --git a/proto/swagger/job.swagger.json b/proto/swagger/job.swagger.json index db7cc5e2294..809305ea46d 100644 --- a/proto/swagger/job.swagger.json +++ b/proto/swagger/job.swagger.json @@ -218,7 +218,7 @@ "FILE" ], "default": "DIRECTORY", - "description": "If indicate hostpath, we need to let user indicate which type \nthey would like to use." + "description": "If indicate hostpath, we need to let user indicate which type\nthey would like to use." }, "VolumeMountPropagationMode": { "type": "string", @@ -300,6 +300,26 @@ } } }, + "protoCapabilities": { + "type": "object", + "properties": { + "add": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Added capabilities" + }, + "drop": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Removed capabilities" + } + }, + "description": "Adds and removes POSIX capabilities from running containers." + }, "protoClusterSpec": { "type": "object", "properties": { @@ -423,6 +443,14 @@ "type": "string" }, "title": "Optional. Labels for the head pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the head container for debugging etc." } }, "title": "Cluster HeadGroup specification", @@ -544,7 +572,7 @@ }, "entrypointResources": { "type": "string", - "description": "Optional entrypointResources specifies the custom resources and quantities to reserve \nfor the entrypoint command." + "description": "Optional entrypointResources specifies the custom resources and quantities to reserve\nfor the entrypoint command." }, "createdAt": { "type": "string", @@ -572,6 +600,23 @@ "type": "string", "description": "Output. A human-readable description of the status of this operation.", "readOnly": true + }, + "startTime": { + "type": "string", + "format": "date-time", + "description": "Output. The time when JobDeploymentStatus transitioned from 'New' to 'Initializing'.", + "readOnly": true + }, + "endTime": { + "type": "string", + "format": "date-time", + "description": "Output. When JobDeploymentStatus transitioned to 'Complete' status.", + "readOnly": true + }, + "rayClusterName": { + "type": "string", + "description": "Output. Name of the ray cluster.", + "readOnly": true } }, "title": "RayJob definition", @@ -606,6 +651,20 @@ "image" ] }, + "protoSecurityContext": { + "type": "object", + "properties": { + "capabilities": { + "$ref": "#/definitions/protoCapabilities", + "description": "Optional. The capabilities to add/drop when running containers." + }, + "privileged": { + "type": "boolean", + "description": "Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false." + } + }, + "description": "SecurityContext holds security configuration that will be applied to a container.\nSome fields are present in both SecurityContext and PodSecurityContext. When both\nare set, the values in SecurityContext take precedence." + }, "protoVolume": { "type": "object", "properties": { @@ -732,6 +791,14 @@ "type": "string" }, "title": "Optional. Labels for the worker pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the worker container for debugging etc." } }, "required": [ diff --git a/proto/swagger/serve.swagger.json b/proto/swagger/serve.swagger.json index a8c857ac2af..7249c85946d 100644 --- a/proto/swagger/serve.swagger.json +++ b/proto/swagger/serve.swagger.json @@ -296,7 +296,7 @@ "FILE" ], "default": "DIRECTORY", - "description": "If indicate hostpath, we need to let user indicate which type \nthey would like to use." + "description": "If indicate hostpath, we need to let user indicate which type\nthey would like to use." }, "VolumeMountPropagationMode": { "type": "string", @@ -378,6 +378,26 @@ } } }, + "protoCapabilities": { + "type": "object", + "properties": { + "add": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Added capabilities" + }, + "drop": { + "type": "array", + "items": { + "type": "string" + }, + "title": "Optional. Removed capabilities" + } + }, + "description": "Adds and removes POSIX capabilities from running containers." + }, "protoClusterSpec": { "type": "object", "properties": { @@ -501,6 +521,14 @@ "type": "string" }, "title": "Optional. Labels for the head pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the head container for debugging etc." } }, "title": "Cluster HeadGroup specification", @@ -724,6 +752,20 @@ } } }, + "protoSecurityContext": { + "type": "object", + "properties": { + "capabilities": { + "$ref": "#/definitions/protoCapabilities", + "description": "Optional. The capabilities to add/drop when running containers." + }, + "privileged": { + "type": "boolean", + "description": "Optional. Run container in privileged mode - essentially equivalent to root on the host. Default is false." + } + }, + "description": "SecurityContext holds security configuration that will be applied to a container.\nSome fields are present in both SecurityContext and PodSecurityContext. When both\nare set, the values in SecurityContext take precedence." + }, "protoServeApplicationStatus": { "type": "object", "properties": { @@ -891,6 +933,14 @@ "type": "string" }, "title": "Optional. Labels for the worker pod" + }, + "imagePullPolicy": { + "type": "string", + "title": "Optional image pull policy We only support Always and ifNotPresent" + }, + "securityContext": { + "$ref": "#/definitions/protoSecurityContext", + "description": "Optional. Configure the security context for the worker container for debugging etc." } }, "required": [ diff --git a/proto/third_party/google/api/annotations.proto b/proto/third_party/google/api/annotations.proto index 8ff42098404..efdab3db6ca 100644 --- a/proto/third_party/google/api/annotations.proto +++ b/proto/third_party/google/api/annotations.proto @@ -28,4 +28,4 @@ option objc_class_prefix = "GAPI"; extend google.protobuf.MethodOptions { // See `HttpRule`. HttpRule http = 72295728; -} \ No newline at end of file +} diff --git a/proto/third_party/google/api/field_behavior.proto b/proto/third_party/google/api/field_behavior.proto index b47136fdf43..c4abe3b670f 100644 --- a/proto/third_party/google/api/field_behavior.proto +++ b/proto/third_party/google/api/field_behavior.proto @@ -87,4 +87,4 @@ enum FieldBehavior { // a non-empty value will be returned. The user will not be aware of what // non-empty value to expect. NON_EMPTY_DEFAULT = 7; -} \ No newline at end of file +} diff --git a/proto/third_party/google/api/http.proto b/proto/third_party/google/api/http.proto index 7d0b228cc58..113fa936a09 100644 --- a/proto/third_party/google/api/http.proto +++ b/proto/third_party/google/api/http.proto @@ -372,4 +372,4 @@ message CustomHttpPattern { // The path matched by this custom verb. string path = 2; -} \ No newline at end of file +} diff --git a/proto/third_party/protoc-gen-openapiv2/options/annotations.proto b/proto/third_party/protoc-gen-openapiv2/options/annotations.proto index d925360a15f..1c189e2065c 100644 --- a/proto/third_party/protoc-gen-openapiv2/options/annotations.proto +++ b/proto/third_party/protoc-gen-openapiv2/options/annotations.proto @@ -41,4 +41,4 @@ extend google.protobuf.FieldOptions { // All IDs are the same, as assigned. It is okay that they are the same, as they extend // different descriptor messages. JSONSchema openapiv2_field = 1042; -} \ No newline at end of file +} diff --git a/proto/third_party/protoc-gen-openapiv2/options/openapiv2.proto b/proto/third_party/protoc-gen-openapiv2/options/openapiv2.proto index 7be1fb572c7..b21258d3fc8 100644 --- a/proto/third_party/protoc-gen-openapiv2/options/openapiv2.proto +++ b/proto/third_party/protoc-gen-openapiv2/options/openapiv2.proto @@ -44,32 +44,32 @@ enum Scheme { // message Swagger { // Specifies the OpenAPI Specification version being used. It can be - // used by the OpenAPI UI and other clients to interpret the API listing. The + // used by the OpenAPI UI and other clients to interpret the API listing. The // value MUST be "2.0". string swagger = 1; - // Provides metadata about the API. The metadata can be used by the + // Provides metadata about the API. The metadata can be used by the // clients if needed. Info info = 2; - // The host (name or ip) serving the API. This MUST be the host only and does + // The host (name or ip) serving the API. This MUST be the host only and does // not include the scheme nor sub-paths. It MAY include a port. If the host is // not included, the host serving the documentation is to be used (including // the port). The host does not support path templating. string host = 3; // The base path on which the API is served, which is relative to the host. If - // it is not included, the API is served directly under the host. The value + // it is not included, the API is served directly under the host. The value // MUST start with a leading slash (/). The basePath does not support path // templating. - // Note that using `base_path` does not change the endpoint paths that are + // Note that using `base_path` does not change the endpoint paths that are // generated in the resulting OpenAPI file. If you wish to use `base_path` - // with relatively generated OpenAPI paths, the `base_path` prefix must be - // manually removed from your `google.api.http` paths and your code changed to + // with relatively generated OpenAPI paths, the `base_path` prefix must be + // manually removed from your `google.api.http` paths and your code changed to // serve the API from the `base_path`. string base_path = 4; // The transfer protocol of the API. Values MUST be from the list: "http", // "https", "ws", "wss". If the schemes is not included, the default scheme to // be used is the one used to access the OpenAPI definition itself. repeated Scheme schemes = 5; - // A list of MIME types the APIs can consume. This is global to all APIs but + // A list of MIME types the APIs can consume. This is global to all APIs but // can be overridden on specific API calls. Value MUST be as described under // Mime Types. repeated string consumes = 6; @@ -88,8 +88,8 @@ message Swagger { // Security scheme definitions that can be used across the specification. SecurityDefinitions security_definitions = 11; // A declaration of which security schemes are applied for the API as a whole. - // The list of values describes alternative security schemes that can be used - // (that is, there is a logical OR between the security requirements). + // The list of values describes alternative security schemes that can be used + // (that is, there is a logical OR between the security requirements). // Individual operations can override this definition. repeated SecurityRequirement security = 12; // field 13 is reserved for 'tags', which are supposed to be exposed as and @@ -440,12 +440,12 @@ message JSONSchema { // This property is the same for 2.0 and 3.0.0 https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/3.0.0.md#schemaObject https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/2.0.md#schemaObject string example = 9; double multiple_of = 10; - // Maximum represents an inclusive upper limit for a numeric instance. The - // value of MUST be a number, + // Maximum represents an inclusive upper limit for a numeric instance. The + // value of MUST be a number, double maximum = 11; bool exclusive_maximum = 12; - // minimum represents an inclusive lower limit for a numeric instance. The - // value of MUST be a number, + // minimum represents an inclusive lower limit for a numeric instance. The + // value of MUST be a number, double minimum = 13; bool exclusive_minimum = 14; uint64 max_length = 15; @@ -493,7 +493,7 @@ message JSONSchema { repeated JSONSchemaSimpleTypes type = 35; // `Format` string format = 36; - // following fields are reserved, as the properties have been omitted from + // following fields are reserved, as the properties have been omitted from // OpenAPI v2: contentMediaType, contentEncoding, if, then, else reserved 37 to 41; // field 42 is reserved for 'allOf', but in OpenAPI-specific way. @@ -521,7 +521,7 @@ message Tag { // global Tag object, then use that name to reference the tag throughout the // OpenAPI file. reserved 1; - // A short description for the tag. GFM syntax can be used for rich text + // A short description for the tag. GFM syntax can be used for rich text // representation. string description = 2; // Additional external documentation for this tag. diff --git a/ray-operator/.gitignore b/ray-operator/.gitignore index 82beae60578..b961b7a98ce 100644 --- a/ray-operator/.gitignore +++ b/ray-operator/.gitignore @@ -5,4 +5,4 @@ __debug_bin config/manager/my-kustomization.yaml -config/manager/my-manager.yaml \ No newline at end of file +config/manager/my-manager.yaml diff --git a/ray-operator/DEVELOPMENT.md b/ray-operator/DEVELOPMENT.md index bd294bc2f25..7c09226729f 100644 --- a/ray-operator/DEVELOPMENT.md +++ b/ray-operator/DEVELOPMENT.md @@ -18,14 +18,14 @@ The instructions assume you have access to a running Kubernetes cluster via `kub For local development, we recommend using [Kind](https://kind.sigs.k8s.io/) to create a Kubernetes cluster. -### Use go v1.20 +### Use go v1.22 -Currently, KubeRay uses go v1.20 for development. +Currently, KubeRay uses go v1.22 for development. ```bash -go install golang.org/dl/go1.20.11@latest -go1.20.11 download -export GOROOT=$(go1.20.11 env GOROOT) +go install golang.org/dl/go1.22.4@latest +go1.22.4 download +export GOROOT=$(go1.22.4 env GOROOT) export PATH="$GOROOT/bin:$PATH" ``` @@ -59,12 +59,15 @@ make clean ### End-to-end local development process on Kind +#### Run the operator inside the cluster + ```bash # Step 1: Create a Kind cluster kind create cluster --image=kindest/node:v1.24.0 # Step 2: Modify KubeRay source code -# For example, add a log "Hello KubeRay" in the function `Reconcile` in `raycluster_controller.go`. +# For example, add a log by adding setupLog.Info("Hello KubeRay") in the function `main` in `main.go`. + # Step 3: Build an image # This command will copy the source code directory into the image, and build it. @@ -89,7 +92,7 @@ helm install kuberay-operator --set image.repository=kuberay/operator --set imag # Step 7: Check the log of KubeRay operator kubectl logs {YOUR_OPERATOR_POD} | grep "Hello KubeRay" -# 2022-12-09T04:41:59.946Z INFO controllers.RayCluster Hello KubeRay +# {"level":"info","ts":"2024-12-25T11:08:07.046Z","logger":"setup","msg":"Hello KubeRay"} # ... ``` @@ -97,6 +100,24 @@ kubectl logs {YOUR_OPERATOR_POD} | grep "Hello KubeRay" * The command `make docker-build` (Step 3) will also run `make test` (unit tests). * Step 6 also installs the custom resource definitions (CRDs) used by the KubeRay operator. +#### Run the operator outside the cluster + +> Note: Running the operator outside the cluster allows you to debug the operator using your IDE. For example, you can set breakpoints in the code and inspect the state of the operator. + +```bash +# Step 1: Create a Kind cluster +kind create cluster --image=kindest/node:v1.24.0 + +# Step 2: Install CRDs +make -C ray-operator install + +# Step 3: Compile the source code +make -C ray-operator build + +# Step 4: Run the KubeRay operator +./ray-operator/bin/manager -leader-election-namespace default -use-kubernetes-proxy +``` + ### Running the tests The unit tests can be run by executing the following command: @@ -180,43 +201,14 @@ helm uninstall kuberay-operator; helm install kuberay-operator --set image.repos > Note: remember to replace with your own image -## CI/CD - -### Linting - -KubeRay uses the gofumpt linter. +## pre-commit hooks -Download gofumpt version **0.5.0**. At the time of writing, v0.5.0 is the latest version compatible with go1.20. Run this command to download it: +1. Install [golangci-lint](https://github.com/golangci/golangci-lint/releases). +2. Install [kubeconform](https://github.com/yannh/kubeconform/releases). +3. Install [pre-commit](https://pre-commit.com/). +4. Run `pre-commit install` to install the pre-commit hooks. -```bash -go install mvdan.cc/gofumpt@v0.5.0 -``` - -As a backup, [here’s the link to the source](https://github.com/mvdan/gofumpt/releases/tag/v0.2.1) (if you installed gofumpt with `go install`, you don’t need this). - -Check that the `gofumpt` version is 0.5.0: - -```bash -gofumpt --version -# v0.5.0 (go1.19) -``` - -Make sure your `go` version is still 1.20: - -```bash -go version -# go version go1.20 darwin/amd64 -``` - -If your `go` version isn’t 1.20 any more, you may have installed a different `gofumpt` version (e.g. by downloading with Homebrew). If you accidentally installed `gofumpt` using Homebrew, run `brew uninstall gofumpt` and then `brew uninstall go`. Then check `brew install go@1.20`. It should be back to 1.20.x. - -Whenever you edit KubeRay code, run the `gofumpt` linter inside the KubeRay directory: - -```bash -gofumpt -w . -``` - -The `-w` flag will overwrite any unformatted code. +## CI/CD ### Helm chart linter diff --git a/ray-operator/Dockerfile b/ray-operator/Dockerfile index 96548321b68..932e24dde7f 100644 --- a/ray-operator/Dockerfile +++ b/ray-operator/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM golang:1.20.10-bullseye as builder +FROM golang:1.22.4-bullseye as builder WORKDIR /workspace # Copy the Go Modules manifests @@ -13,12 +13,14 @@ RUN go mod download COPY main.go main.go COPY apis/ apis/ COPY controllers/ controllers/ +COPY pkg/features pkg/features +COPY pkg/utils pkg/utils # Build USER root RUN CGO_ENABLED=1 GOOS=linux go build -tags strictfipsruntime -a -o manager main.go -FROM gcr.io/distroless/base-debian11:nonroot +FROM gcr.io/distroless/base-debian12:nonroot WORKDIR / COPY --from=builder /workspace/manager . USER 65532:65532 diff --git a/ray-operator/Dockerfile.buildx b/ray-operator/Dockerfile.buildx index e6de9115415..9f99489658e 100644 --- a/ray-operator/Dockerfile.buildx +++ b/ray-operator/Dockerfile.buildx @@ -1,4 +1,4 @@ -FROM registry.access.redhat.com/ubi9/ubi-minimal:9.3 +FROM gcr.io/distroless/base-debian12:nonroot ARG TARGETARCH WORKDIR / COPY ./manager-${TARGETARCH} ./manager diff --git a/ray-operator/Makefile b/ray-operator/Makefile index 6697eb029ca..2ca043467d9 100644 --- a/ray-operator/Makefile +++ b/ray-operator/Makefile @@ -72,6 +72,11 @@ test-e2e: WHAT ?= ./test/e2e test-e2e: manifests fmt vet ## Run e2e tests. go test -timeout 30m -v $(WHAT) + +test-sampleyaml: WHAT ?= ./test/sampleyaml +test-sampleyaml: manifests fmt vet + go test -timeout 30m -v $(WHAT) + sync: helm api-docs ./hack/update-codegen.sh @@ -146,7 +151,7 @@ $(LOCALBIN): CONTROLLER_GEN = $(LOCALBIN)/controller-gen $(CONTROLLER_GEN): $(LOCALBIN) controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. - test -s $(CONTROLLER_GEN) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.13.0 + test -s $(CONTROLLER_GEN) || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.15.0 KUSTOMIZE = $(LOCALBIN)/kustomize $(KUSTOMIZE): $(LOCALBIN) @@ -167,7 +172,7 @@ CRD_REF_DOCS = $(LOCALBIN)/crd-ref-docs $(CRD_REF_DOCS): $(LOCALBIN) .PHONY: crd-ref-docs crd-ref-docs: $(CRD_REF_DOCS) ## Download crd-ref-docs locally if necessary. - test -s $(CRD_REF_DOCS) || GOBIN=$(LOCALBIN) go install github.com/elastic/crd-ref-docs@v0.0.10 + test -s $(CRD_REF_DOCS) || GOBIN=$(LOCALBIN) go install github.com/elastic/crd-ref-docs@v0.0.12 .PHONY: clean clean: diff --git a/ray-operator/apis/config/v1alpha1/config_utils.go b/ray-operator/apis/config/v1alpha1/config_utils.go new file mode 100644 index 00000000000..9ac49f94722 --- /dev/null +++ b/ray-operator/apis/config/v1alpha1/config_utils.go @@ -0,0 +1,34 @@ +package v1alpha1 + +import ( + "fmt" + + "github.com/go-logr/logr" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn" +) + +func ValidateBatchSchedulerConfig(logger logr.Logger, config Configuration) error { + if config.EnableBatchScheduler && len(config.BatchScheduler) > 0 { + return fmt.Errorf("both feature flags enable-batch-scheduler (deprecated) and batch-scheduler are set. Please use batch-scheduler only") + } + + if config.EnableBatchScheduler { + logger.Info("Feature flag enable-batch-scheduler is deprecated and will not be supported soon. " + + "Use batch-scheduler instead. ") + return nil + } + + if len(config.BatchScheduler) > 0 { + // if a customized scheduler is configured, check it is supported + if config.BatchScheduler == volcano.GetPluginName() || config.BatchScheduler == yunikorn.GetPluginName() { + logger.Info("Feature flag batch-scheduler is enabled", + "scheduler name", config.BatchScheduler) + } else { + return fmt.Errorf("scheduler is not supported, name=%s", config.BatchScheduler) + } + } + + return nil +} diff --git a/ray-operator/apis/config/v1alpha1/config_utils_test.go b/ray-operator/apis/config/v1alpha1/config_utils_test.go new file mode 100644 index 00000000000..3efeb878687 --- /dev/null +++ b/ray-operator/apis/config/v1alpha1/config_utils_test.go @@ -0,0 +1,105 @@ +package v1alpha1 + +import ( + "testing" + + "github.com/go-logr/logr" + "github.com/go-logr/logr/testr" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn" +) + +func TestValidateBatchSchedulerConfig(t *testing.T) { + type args struct { + logger logr.Logger + config Configuration + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "legacy option, enable-batch-scheduler=false", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: false, + }, + }, + wantErr: false, + }, + { + name: "legacy option, enable-batch-scheduler=true", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: true, + }, + }, + wantErr: false, + }, + { + name: "valid option, batch-scheduler=yunikorn", + args: args{ + logger: testr.New(t), + config: Configuration{ + BatchScheduler: yunikorn.GetPluginName(), + }, + }, + wantErr: false, + }, + { + name: "valid option, batch-scheduler=volcano", + args: args{ + logger: testr.New(t), + config: Configuration{ + BatchScheduler: volcano.GetPluginName(), + }, + }, + wantErr: false, + }, + { + name: "invalid option, invalid scheduler name", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: false, + BatchScheduler: "unknown-scheduler-name", + }, + }, + wantErr: true, + }, + { + name: "invalid option, invalid scheduler name default", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: false, + BatchScheduler: "default", + }, + }, + wantErr: true, + }, + { + name: "both enable-batch-scheduler and batch-scheduler are set", + args: args{ + logger: testr.New(t), + config: Configuration{ + EnableBatchScheduler: true, + BatchScheduler: "volcano", + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := ValidateBatchSchedulerConfig(tt.args.logger, tt.args.config); (err != nil) != tt.wantErr { + t.Errorf("ValidateBatchSchedulerConfig() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/ray-operator/apis/config/v1alpha1/configuration_types.go b/ray-operator/apis/config/v1alpha1/configuration_types.go index 1998853cb49..92a8f00fff7 100644 --- a/ray-operator/apis/config/v1alpha1/configuration_types.go +++ b/ray-operator/apis/config/v1alpha1/configuration_types.go @@ -3,6 +3,9 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) //+kubebuilder:object:root=true @@ -25,16 +28,10 @@ type Configuration struct { // resources live. Defaults to the pod namesapce if not set. LeaderElectionNamespace string `json:"leaderElectionNamespace,omitempty"` - // ReconcileConcurrency is the max concurrency for each reconciler. - ReconcileConcurrency int `json:"reconcileConcurrency,omitempty"` - // WatchNamespace specifies a list of namespaces to watch for custom resources, separated by commas. // If empty, all namespaces will be watched. WatchNamespace string `json:"watchNamespace,omitempty"` - // ForcedClusterUpgrade enables force upgrading clusters. - ForcedClusterUpgrade bool `json:"forcedClusterUpgrade,omitempty"` - // LogFile is a path to a local file for synchronizing logs. LogFile string `json:"logFile,omitempty"` @@ -46,9 +43,9 @@ type Configuration struct { // Defaults to `json` if empty. LogStdoutEncoder string `json:"logStdoutEncoder,omitempty"` - // EnableBatchScheduler enables the batch scheduler. Currently this is supported - // by Volcano to support gang scheduling. - EnableBatchScheduler bool `json:"enableBatchScheduler,omitempty"` + // BatchScheduler enables the batch scheduler integration with a specific scheduler + // based on the given name, currently, supported values are volcano and yunikorn. + BatchScheduler string `json:"batchScheduler,omitempty"` // HeadSidecarContainers includes specification for a sidecar container // to inject into every Head pod. @@ -57,4 +54,28 @@ type Configuration struct { // WorkerSidecarContainers includes specification for a sidecar container // to inject into every Worker pod. WorkerSidecarContainers []corev1.Container `json:"workerSidecarContainers,omitempty"` + + // ReconcileConcurrency is the max concurrency for each reconciler. + ReconcileConcurrency int `json:"reconcileConcurrency,omitempty"` + + // EnableBatchScheduler enables the batch scheduler. Currently this is supported + // by Volcano to support gang scheduling. + EnableBatchScheduler bool `json:"enableBatchScheduler,omitempty"` + + // UseKubernetesProxy indicates that the services/proxy and pods/proxy subresource should be used + // when connecting to the Ray Head node. This is useful when network policies disallow + // ingress traffic to the Ray cluster from other pods or Kuberay is running in a network without + // connectivity to Pods. + UseKubernetesProxy bool `json:"useKubernetesProxy,omitempty"` + + // DeleteRayJobAfterJobFinishes deletes the RayJob CR itself if shutdownAfterJobFinishes is set to true. + DeleteRayJobAfterJobFinishes bool `json:"deleteRayJobAfterJobFinishes,omitempty"` +} + +func (config Configuration) GetDashboardClient(mgr manager.Manager) func() utils.RayDashboardClientInterface { + return utils.GetRayDashboardClientFunc(mgr, config.UseKubernetesProxy) +} + +func (config Configuration) GetHttpProxyClient(mgr manager.Manager) func() utils.RayHttpProxyClientInterface { + return utils.GetRayHttpProxyClientFunc(mgr, config.UseKubernetesProxy) } diff --git a/ray-operator/apis/config/v1alpha1/defaults.go b/ray-operator/apis/config/v1alpha1/defaults.go index d694e63b7f2..2a0fcef5b81 100644 --- a/ray-operator/apis/config/v1alpha1/defaults.go +++ b/ray-operator/apis/config/v1alpha1/defaults.go @@ -2,7 +2,7 @@ package v1alpha1 import ( "k8s.io/apimachinery/pkg/runtime" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" ) const ( @@ -30,7 +30,7 @@ func SetDefaults_Configuration(cfg *Configuration) { } if cfg.EnableLeaderElection == nil { - cfg.EnableLeaderElection = pointer.Bool(DefaultEnableLeaderElection) + cfg.EnableLeaderElection = ptr.To(DefaultEnableLeaderElection) } if cfg.ReconcileConcurrency == 0 { diff --git a/ray-operator/apis/ray/v1/raycluster_types.go b/ray-operator/apis/ray/v1/raycluster_types.go index f0e1c40ec6a..c3f086eb61c 100644 --- a/ray-operator/apis/ray/v1/raycluster_types.go +++ b/ray-operator/apis/ray/v1/raycluster_types.go @@ -11,22 +11,47 @@ import ( // RayClusterSpec defines the desired state of RayCluster type RayClusterSpec struct { + // Suspend indicates whether a RayCluster should be suspended. + // A suspended RayCluster will have head pods and worker pods deleted. + Suspend *bool `json:"suspend,omitempty"` + // ManagedBy is an optional configuration for the controller or entity that manages a RayCluster. + // The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'. + // The kuberay-operator reconciles a RayCluster which doesn't have this field at all or + // the field value is the reserved string 'ray.io/kuberay-operator', + // but delegates reconciling the RayCluster with 'kueue.x-k8s.io/multikueue' to the Kueue. + // The field is immutable. + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="the managedBy field is immutable" + // +kubebuilder:validation:XValidation:rule="self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue']",message="the managedBy field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'" + ManagedBy *string `json:"managedBy,omitempty"` + // AutoscalerOptions specifies optional configuration for the Ray autoscaler. + AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"` + HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` + // EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs + EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` + // GcsFaultToleranceOptions for enabling GCS FT + GcsFaultToleranceOptions *GcsFaultToleranceOptions `json:"gcsFaultToleranceOptions,omitempty"` // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file // HeadGroupSpecs are the spec for the head pod HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"` - // WorkerGroupSpecs are the specs for the worker pods - WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"` // RayVersion is used to determine the command for the Kubernetes Job managed by RayJob RayVersion string `json:"rayVersion,omitempty"` - // EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs - EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` - // AutoscalerOptions specifies optional configuration for the Ray autoscaler. - AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"` - HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` - // Suspend indicates whether a RayCluster should be suspended. - // A suspended RayCluster will have head pods and worker pods deleted. - Suspend *bool `json:"suspend,omitempty"` + // WorkerGroupSpecs are the specs for the worker pods + WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"` +} + +// GcsFaultToleranceOptions contains configs for GCS FT +type GcsFaultToleranceOptions struct { + RedisUsername *RedisCredential `json:"redisUsername,omitempty"` + RedisPassword *RedisCredential `json:"redisPassword,omitempty"` + ExternalStorageNamespace string `json:"externalStorageNamespace,omitempty"` + RedisAddress string `json:"redisAddress"` +} + +// RedisCredential is the redis username/password or a reference to the source containing the username/password +type RedisCredential struct { + ValueFrom *corev1.EnvVarSource `json:"valueFrom,omitempty"` + Value string `json:"value,omitempty"` } // HeadGroupSpec are the spec for the head pod @@ -45,6 +70,10 @@ type HeadGroupSpec struct { // WorkerGroupSpec are the specs for the worker pods type WorkerGroupSpec struct { + // Suspend indicates whether a worker group should be suspended. + // A suspended worker group will have all pods deleted. + // This is not a user-facing API and is only used by RayJob DeletionPolicy. + Suspend *bool `json:"suspend,omitempty"` // we can have multiple worker groups, we distinguish them by name GroupName string `json:"groupName"` // Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. @@ -56,15 +85,18 @@ type WorkerGroupSpec struct { // MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. // +kubebuilder:default:=2147483647 MaxReplicas *int32 `json:"maxReplicas"` - // NumOfHosts denotes the number of hosts to create per replica. The default value is 1. - // +kubebuilder:default:=1 - NumOfHosts int32 `json:"numOfHosts,omitempty"` + // IdleTimeoutSeconds denotes the number of seconds to wait before the v2 autoscaler terminates an idle worker pod of this type. + // This value is only used with the Ray Autoscaler enabled and defaults to the value set by the AutoscalingConfig if not specified for this worker group. + IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` // RayStartParams are the params of the start command: address, object-store-memory, ... RayStartParams map[string]string `json:"rayStartParams"` // Template is a pod template for the worker Template corev1.PodTemplateSpec `json:"template"` // ScaleStrategy defines which pods to remove ScaleStrategy ScaleStrategy `json:"scaleStrategy,omitempty"` + // NumOfHosts denotes the number of hosts to create per replica. The default value is 1. + // +kubebuilder:default:=1 + NumOfHosts int32 `json:"numOfHosts,omitempty"` } // ScaleStrategy to remove workers @@ -82,12 +114,6 @@ type AutoscalerOptions struct { Image *string `json:"image,omitempty"` // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. ImagePullPolicy *corev1.PullPolicy `json:"imagePullPolicy,omitempty"` - // Optional list of environment variables to set in the autoscaler container. - Env []corev1.EnvVar `json:"env,omitempty"` - // Optional list of sources to populate environment variables in the autoscaler container. - EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"` - // Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. - VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"` // SecurityContext defines the security options the container should be run with. // If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. // More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ @@ -101,6 +127,12 @@ type AutoscalerOptions struct { // Aggressive: An alias for Default; upscaling is not rate-limited. // It is not read by the KubeRay operator but by the Ray autoscaler. UpscalingMode *UpscalingMode `json:"upscalingMode,omitempty"` + // Optional list of environment variables to set in the autoscaler container. + Env []corev1.EnvVar `json:"env,omitempty"` + // Optional list of sources to populate environment variables in the autoscaler container. + EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"` + // Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. + VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"` } // +kubebuilder:validation:Enum=Default;Aggressive;Conservative @@ -110,8 +142,8 @@ type UpscalingMode string type ClusterState string const ( - Ready ClusterState = "ready" - Unhealthy ClusterState = "unhealthy" + Ready ClusterState = "ready" + // Failed is deprecated, but we keep it to avoid compilation errors in projects that import the KubeRay Golang module. Failed ClusterState = "failed" Suspended ClusterState = "suspended" ) @@ -121,15 +153,9 @@ type RayClusterStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file // Status reflects the status of the cluster + // + // Deprecated: the State field is replaced by the Conditions field. State ClusterState `json:"state,omitempty"` - // AvailableWorkerReplicas indicates how many replicas are available in the cluster - AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"` - // DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level. - DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"` - // MinWorkerReplicas indicates sum of minimum replicas of each node group. - MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"` - // MaxWorkerReplicas indicates sum of maximum replicas of each node group. - MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"` // DesiredCPU indicates total desired CPUs for the cluster DesiredCPU resource.Quantity `json:"desiredCPU,omitempty"` // DesiredMemory indicates total desired memory for the cluster @@ -141,21 +167,69 @@ type RayClusterStatus struct { // LastUpdateTime indicates last update timestamp for this cluster status. // +nullable LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + // StateTransitionTimes indicates the time of the last state transition for each state. + StateTransitionTimes map[ClusterState]*metav1.Time `json:"stateTransitionTimes,omitempty"` // Service Endpoints Endpoints map[string]string `json:"endpoints,omitempty"` // Head info Head HeadInfo `json:"head,omitempty"` // Reason provides more information about current State Reason string `json:"reason,omitempty"` + + // Represents the latest available observations of a RayCluster's current state. + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` + + // ReadyWorkerReplicas indicates how many worker replicas are ready in the cluster + ReadyWorkerReplicas int32 `json:"readyWorkerReplicas,omitempty"` + // AvailableWorkerReplicas indicates how many replicas are available in the cluster + AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"` + // DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level. + DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"` + // MinWorkerReplicas indicates sum of minimum replicas of each node group. + MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"` + // MaxWorkerReplicas indicates sum of maximum replicas of each node group. + MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"` // observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the // RayCluster's generation, which is updated on mutation by the API Server. ObservedGeneration int64 `json:"observedGeneration,omitempty"` } +type RayClusterConditionType string + +// Custom Reason for RayClusterCondition +const ( + AllPodRunningAndReadyFirstTime = "AllPodRunningAndReadyFirstTime" + RayClusterPodsProvisioning = "RayClusterPodsProvisioning" + HeadPodNotFound = "HeadPodNotFound" + HeadPodRunningAndReady = "HeadPodRunningAndReady" + // UnknownReason says that the reason for the condition is unknown. + UnknownReason = "Unknown" +) + +const ( + // RayClusterProvisioned indicates whether all Ray Pods are ready for the first time. + // After RayClusterProvisioned is set to true for the first time, it will not change anymore. + RayClusterProvisioned RayClusterConditionType = "RayClusterProvisioned" + // HeadPodReady indicates whether RayCluster's head Pod is ready for requests. + HeadPodReady RayClusterConditionType = "HeadPodReady" + // RayClusterReplicaFailure is added in a RayCluster when one of its pods fails to be created or deleted. + RayClusterReplicaFailure RayClusterConditionType = "ReplicaFailure" + // RayClusterSuspending is set to true when a user sets .Spec.Suspend to true, ensuring the atomicity of the suspend operation. + RayClusterSuspending RayClusterConditionType = "RayClusterSuspending" + // RayClusterSuspended is set to true when all Pods belonging to a suspending RayCluster are deleted. Note that RayClusterSuspending and RayClusterSuspended cannot both be true at the same time. + RayClusterSuspended RayClusterConditionType = "RayClusterSuspended" +) + // HeadInfo gives info about head type HeadInfo struct { - PodIP string `json:"podIP,omitempty"` - ServiceIP string `json:"serviceIP,omitempty"` + PodIP string `json:"podIP,omitempty"` + ServiceIP string `json:"serviceIP,omitempty"` + PodName string `json:"podName,omitempty"` + ServiceName string `json:"serviceName,omitempty"` } // RayNodeType the type of a ray node: head/worker diff --git a/ray-operator/apis/ray/v1/raycluster_types_test.go b/ray-operator/apis/ray/v1/raycluster_types_test.go index 0feeeaa9fc0..6dce7a77fd6 100644 --- a/ray-operator/apis/ray/v1/raycluster_types_test.go +++ b/ray-operator/apis/ray/v1/raycluster_types_test.go @@ -6,7 +6,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" ) var myRayCluster = &RayCluster{ @@ -55,9 +55,9 @@ var myRayCluster = &RayCluster{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), NumOfHosts: 1, GroupName: "small-group", RayStartParams: map[string]string{ diff --git a/ray-operator/apis/ray/v1/raycluster_webhook.go b/ray-operator/apis/ray/v1/raycluster_webhook.go index 42c2aa9c418..6650ef9534f 100644 --- a/ray-operator/apis/ray/v1/raycluster_webhook.go +++ b/ray-operator/apis/ray/v1/raycluster_webhook.go @@ -37,7 +37,7 @@ func (r *RayCluster) ValidateCreate() (admission.Warnings, error) { } // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type -func (r *RayCluster) ValidateUpdate(old runtime.Object) (admission.Warnings, error) { +func (r *RayCluster) ValidateUpdate(_ runtime.Object) (admission.Warnings, error) { rayclusterlog.Info("validate update", "name", r.Name) return nil, r.validateRayCluster() } diff --git a/ray-operator/apis/ray/v1/rayjob_types.go b/ray-operator/apis/ray/v1/rayjob_types.go index 4d7a0dec057..ab95ee8993d 100644 --- a/ray-operator/apis/ray/v1/rayjob_types.go +++ b/ray-operator/apis/ray/v1/rayjob_types.go @@ -41,6 +41,8 @@ const ( JobDeploymentStatusFailed JobDeploymentStatus = "Failed" JobDeploymentStatusSuspending JobDeploymentStatus = "Suspending" JobDeploymentStatusSuspended JobDeploymentStatus = "Suspended" + JobDeploymentStatusRetrying JobDeploymentStatus = "Retrying" + JobDeploymentStatusWaiting JobDeploymentStatus = "Waiting" ) // JobFailedReason indicates the reason the RayJob changes its JobDeploymentStatus to 'Failed' @@ -55,55 +57,92 @@ const ( type JobSubmissionMode string const ( - K8sJobMode JobSubmissionMode = "K8sJobMode" // Submit job via Kubernetes Job - HTTPMode JobSubmissionMode = "HTTPMode" // Submit job via HTTP request + K8sJobMode JobSubmissionMode = "K8sJobMode" // Submit job via Kubernetes Job + HTTPMode JobSubmissionMode = "HTTPMode" // Submit job via HTTP request + InteractiveMode JobSubmissionMode = "InteractiveMode" // Don't submit job in KubeRay. Instead, wait for user to submit job and provide the job submission ID. ) +type DeletionPolicy string + +const ( + DeleteClusterDeletionPolicy DeletionPolicy = "DeleteCluster" // Deletion policy to delete the entire RayCluster custom resource on job completion. + DeleteWorkersDeletionPolicy DeletionPolicy = "DeleteWorkers" // Deletion policy to delete only the workers on job completion. + DeleteSelfDeletionPolicy DeletionPolicy = "DeleteSelf" // Deletion policy to delete the RayJob custom resource (and all associated resources) on job completion. + DeleteNoneDeletionPolicy DeletionPolicy = "DeleteNone" // Deletion policy to delete no resources on job completion. +) + +type SubmitterConfig struct { + // BackoffLimit of the submitter k8s job. + BackoffLimit *int32 `json:"backoffLimit,omitempty"` +} + // RayJobSpec defines the desired state of RayJob type RayJobSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - Entrypoint string `json:"entrypoint"` - // Metadata is data to store along with this job. - Metadata map[string]string `json:"metadata,omitempty"` - // RuntimeEnvYAML represents the runtime environment configuration - // provided as a multi-line YAML string. - RuntimeEnvYAML string `json:"runtimeEnvYAML,omitempty"` - // If jobId is not set, a new jobId will be auto-generated. - JobId string `json:"jobId,omitempty"` - // ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. - ShutdownAfterJobFinishes bool `json:"shutdownAfterJobFinishes,omitempty"` - // TTLSecondsAfterFinished is the TTL to clean up RayCluster. - // It's only working when ShutdownAfterJobFinishes set to true. - // +kubebuilder:default:=0 - TTLSecondsAfterFinished int32 `json:"ttlSecondsAfterFinished,omitempty"` // ActiveDeadlineSeconds is the duration in seconds that the RayJob may be active before // KubeRay actively tries to terminate the RayJob; value must be positive integer. ActiveDeadlineSeconds *int32 `json:"activeDeadlineSeconds,omitempty"` + // Specifies the number of retries before marking this job failed. + // Each retry creates a new RayCluster. + // +kubebuilder:default:=0 + BackoffLimit *int32 `json:"backoffLimit,omitempty"` // RayClusterSpec is the cluster template to run the job RayClusterSpec *RayClusterSpec `json:"rayClusterSpec,omitempty"` + // SubmitterPodTemplate is the template for the pod that will run `ray job submit`. + SubmitterPodTemplate *corev1.PodTemplateSpec `json:"submitterPodTemplate,omitempty"` + // Metadata is data to store along with this job. + Metadata map[string]string `json:"metadata,omitempty"` // clusterSelector is used to select running rayclusters by labels ClusterSelector map[string]string `json:"clusterSelector,omitempty"` + // Configurations of submitter k8s job. + SubmitterConfig *SubmitterConfig `json:"submitterConfig,omitempty"` + // ManagedBy is an optional configuration for the controller or entity that manages a RayJob. + // The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'. + // The kuberay-operator reconciles a RayJob which doesn't have this field at all or + // the field value is the reserved string 'ray.io/kuberay-operator', + // but delegates reconciling the RayJob with 'kueue.x-k8s.io/multikueue' to the Kueue. + // The field is immutable. + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="the managedBy field is immutable" + // +kubebuilder:validation:XValidation:rule="self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue']",message="the managedBy field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'" + ManagedBy *string `json:"managedBy,omitempty"` + // DeletionPolicy indicates what resources of the RayJob are deleted upon job completion. + // Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'DeleteNone'. + // If unset, deletion policy is based on 'spec.shutdownAfterJobFinishes'. + // This field requires the RayJobDeletionPolicy feature gate to be enabled. + // +kubebuilder:validation:XValidation:rule="self in ['DeleteCluster', 'DeleteWorkers', 'DeleteSelf', 'DeleteNone']",message="the deletionPolicy field value must be either 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone'" + DeletionPolicy *DeletionPolicy `json:"deletionPolicy,omitempty"` + // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster + // Important: Run "make" to regenerate code after modifying this file + Entrypoint string `json:"entrypoint,omitempty"` + // RuntimeEnvYAML represents the runtime environment configuration + // provided as a multi-line YAML string. + RuntimeEnvYAML string `json:"runtimeEnvYAML,omitempty"` + // If jobId is not set, a new jobId will be auto-generated. + JobId string `json:"jobId,omitempty"` // SubmissionMode specifies how RayJob submits the Ray job to the RayCluster. // In "K8sJobMode", the KubeRay operator creates a submitter Kubernetes Job to submit the Ray job. // In "HTTPMode", the KubeRay operator sends a request to the RayCluster to create a Ray job. + // In "InteractiveMode", the KubeRay operator waits for a user to submit a job to the Ray cluster. // +kubebuilder:default:=K8sJobMode SubmissionMode JobSubmissionMode `json:"submissionMode,omitempty"` + // EntrypointResources specifies the custom resources and quantities to reserve for the + // entrypoint command. + EntrypointResources string `json:"entrypointResources,omitempty"` + // EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. + EntrypointNumCpus float32 `json:"entrypointNumCpus,omitempty"` + // EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. + EntrypointNumGpus float32 `json:"entrypointNumGpus,omitempty"` + // TTLSecondsAfterFinished is the TTL to clean up RayCluster. + // It's only working when ShutdownAfterJobFinishes set to true. + // +kubebuilder:default:=0 + TTLSecondsAfterFinished int32 `json:"ttlSecondsAfterFinished,omitempty"` + // ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. + ShutdownAfterJobFinishes bool `json:"shutdownAfterJobFinishes,omitempty"` // suspend specifies whether the RayJob controller should create a RayCluster instance // If a job is applied with the suspend field set to true, // the RayCluster will not be created and will wait for the transition to false. // If the RayCluster is already created, it will be deleted. // In case of transition to false a new RayCluster will be created. Suspend bool `json:"suspend,omitempty"` - // SubmitterPodTemplate is the template for the pod that will run `ray job submit`. - SubmitterPodTemplate *corev1.PodTemplateSpec `json:"submitterPodTemplate,omitempty"` - // EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. - EntrypointNumCpus float32 `json:"entrypointNumCpus,omitempty"` - // EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. - EntrypointNumGpus float32 `json:"entrypointNumGpus,omitempty"` - // EntrypointResources specifies the custom resources and quantities to reserve for the - // entrypoint command. - EntrypointResources string `json:"entrypointResources,omitempty"` } // RayJobStatus defines the observed state of RayJob @@ -122,8 +161,16 @@ type RayJobStatus struct { // EndTime is the time when JobDeploymentStatus transitioned to 'Complete' status. // This occurs when the Ray job reaches a terminal state (SUCCEEDED, FAILED, STOPPED) // or the submitter Job has failed. - EndTime *metav1.Time `json:"endTime,omitempty"` + EndTime *metav1.Time `json:"endTime,omitempty"` + // Succeeded is the number of times this job succeeded. + // +kubebuilder:default:=0 + Succeeded *int32 `json:"succeeded,omitempty"` + // Failed is the number of times this job failed. + // +kubebuilder:default:=0 + Failed *int32 `json:"failed,omitempty"` + // RayClusterStatus is the status of the RayCluster running the job. RayClusterStatus RayClusterStatus `json:"rayClusterStatus,omitempty"` + // observedGeneration is the most recent generation observed for this RayJob. It corresponds to the // RayJob's generation, which is updated on mutation by the API Server. ObservedGeneration int64 `json:"observedGeneration,omitempty"` diff --git a/ray-operator/apis/ray/v1/rayjob_types_test.go b/ray-operator/apis/ray/v1/rayjob_types_test.go index 6c498a72a5c..6e599874643 100644 --- a/ray-operator/apis/ray/v1/rayjob_types_test.go +++ b/ray-operator/apis/ray/v1/rayjob_types_test.go @@ -6,10 +6,10 @@ import ( "testing" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" ) var expectedRayJob = RayJob{ @@ -87,9 +87,9 @@ var expectedRayJob = RayJob{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), NumOfHosts: 1, GroupName: "small-group", RayStartParams: map[string]string{ diff --git a/ray-operator/apis/ray/v1/rayservice_types.go b/ray-operator/apis/ray/v1/rayservice_types.go index e93ba805bc1..33371a6f84f 100644 --- a/ray-operator/apis/ray/v1/rayservice_types.go +++ b/ray-operator/apis/ray/v1/rayservice_types.go @@ -11,13 +11,18 @@ import ( type ServiceStatus string const ( - FailedToGetOrCreateRayCluster ServiceStatus = "FailedToGetOrCreateRayCluster" - WaitForServeDeploymentReady ServiceStatus = "WaitForServeDeploymentReady" - FailedToGetServeDeploymentStatus ServiceStatus = "FailedToGetServeDeploymentStatus" - Running ServiceStatus = "Running" - Restarting ServiceStatus = "Restarting" - FailedToUpdateServingPodLabel ServiceStatus = "FailedToUpdateServingPodLabel" - FailedToUpdateService ServiceStatus = "FailedToUpdateService" + WaitForServeDeploymentReady ServiceStatus = "WaitForServeDeploymentReady" + Running ServiceStatus = "Running" + PreparingNewCluster ServiceStatus = "PreparingNewCluster" +) + +type RayServiceUpgradeType string + +const ( + // During upgrade, NewCluster strategy will create new upgraded cluster and switch to it when it becomes ready + NewCluster RayServiceUpgradeType = "NewCluster" + // No new cluster will be created while the strategy is set to None + None RayServiceUpgradeType = "None" ) // These statuses should match Ray Serve's application statuses @@ -49,35 +54,45 @@ var DeploymentStatusEnum = struct { UNHEALTHY: "UNHEALTHY", } +type RayServiceUpgradeStrategy struct { + // Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. + Type *RayServiceUpgradeType `json:"type,omitempty"` +} + // RayServiceSpec defines the desired state of RayService type RayServiceSpec struct { - // Important: Run "make" to regenerate code after modifying this file - // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. - ServeConfigV2 string `json:"serveConfigV2,omitempty"` - RayClusterSpec RayClusterSpec `json:"rayClusterConfig,omitempty"` // Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` // Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. ServeService *corev1.Service `json:"serveService,omitempty"` + // UpgradeStrategy defines the scaling policy used when upgrading the RayService. + UpgradeStrategy *RayServiceUpgradeStrategy `json:"upgradeStrategy,omitempty"` + // Important: Run "make" to regenerate code after modifying this file + // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. + ServeConfigV2 string `json:"serveConfigV2,omitempty"` + RayClusterSpec RayClusterSpec `json:"rayClusterConfig,omitempty"` + // If the field is set to true, the value of the label `ray.io/serve` on the head Pod should always be false. + // Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service. + ExcludeHeadPodFromServeSvc bool `json:"excludeHeadPodFromServeSvc,omitempty"` } // RayServiceStatuses defines the observed state of RayService type RayServiceStatuses struct { + // LastUpdateTime represents the timestamp when the RayService status was last updated. + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + // ServiceStatus indicates the current RayService status. + ServiceStatus ServiceStatus `json:"serviceStatus,omitempty"` ActiveServiceStatus RayServiceStatus `json:"activeServiceStatus,omitempty"` // Pending Service Status indicates a RayCluster will be created or is being created. PendingServiceStatus RayServiceStatus `json:"pendingServiceStatus,omitempty"` - // ServiceStatus indicates the current RayService status. - ServiceStatus ServiceStatus `json:"serviceStatus,omitempty"` // NumServeEndpoints indicates the number of Ray Pods that are actively serving or have been selected by the serve service. // Ray Pods without a proxy actor or those that are unhealthy will not be counted. NumServeEndpoints int32 `json:"numServeEndpoints,omitempty"` // observedGeneration is the most recent generation observed for this RayService. It corresponds to the // RayService's generation, which is updated on mutation by the API Server. ObservedGeneration int64 `json:"observedGeneration,omitempty"` - // LastUpdateTime represents the timestamp when the RayService status was last updated. - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` } type RayServiceStatus struct { @@ -88,23 +103,23 @@ type RayServiceStatus struct { } type AppStatus struct { - Status string `json:"status,omitempty"` - Message string `json:"message,omitempty"` // Keep track of how long the service is healthy. // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` Deployments map[string]ServeDeploymentStatus `json:"serveDeploymentStatuses,omitempty"` + Status string `json:"status,omitempty"` + Message string `json:"message,omitempty"` } // ServeDeploymentStatus defines the current state of a Serve deployment type ServeDeploymentStatus struct { + // Keep track of how long the service is healthy. + // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. + HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` // Name, Status, Message are from Ray Dashboard and represent a Serve deployment's state. // TODO: change status type to enum Status string `json:"status,omitempty"` Message string `json:"message,omitempty"` - // Keep track of how long the service is healthy. - // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. - HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` } // +kubebuilder:object:root=true diff --git a/ray-operator/apis/ray/v1/rayservice_types_test.go b/ray-operator/apis/ray/v1/rayservice_types_test.go index 804143851fb..6a73b4642a7 100644 --- a/ray-operator/apis/ray/v1/rayservice_types_test.go +++ b/ray-operator/apis/ray/v1/rayservice_types_test.go @@ -6,10 +6,10 @@ import ( "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" ) var myRayService = &RayService{ @@ -87,9 +87,9 @@ var myRayService = &RayService{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), NumOfHosts: 1, GroupName: "small-group", RayStartParams: map[string]string{ diff --git a/ray-operator/apis/ray/v1/webhook_suite_test.go b/ray-operator/apis/ray/v1/webhook_suite_test.go index a0313630a1e..52f4e10f402 100644 --- a/ray-operator/apis/ray/v1/webhook_suite_test.go +++ b/ray-operator/apis/ray/v1/webhook_suite_test.go @@ -110,7 +110,11 @@ var _ = BeforeSuite(func() { dialer := &net.Dialer{Timeout: time.Second} addrPort := fmt.Sprintf("%s:%d", webhookInstallOptions.LocalServingHost, webhookInstallOptions.LocalServingPort) Eventually(func() error { - conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true}) + conn, err := tls.DialWithDialer(dialer, + "tcp", + addrPort, + &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Allow InsecureSkipVerify because we are connecting to our own webhook server. + ) if err != nil { return err } diff --git a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go index 75cd1cfeb5c..77a202d4058 100644 --- a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go @@ -6,6 +6,7 @@ package v1 import ( corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -53,6 +54,21 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { *out = new(corev1.PullPolicy) **out = **in } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } + if in.IdleTimeoutSeconds != nil { + in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds + *out = new(int32) + **out = **in + } + if in.UpscalingMode != nil { + in, out := &in.UpscalingMode, &out.UpscalingMode + *out = new(UpscalingMode) + **out = **in + } if in.Env != nil { in, out := &in.Env, &out.Env *out = make([]corev1.EnvVar, len(*in)) @@ -74,21 +90,6 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.SecurityContext != nil { - in, out := &in.SecurityContext, &out.SecurityContext - *out = new(corev1.SecurityContext) - (*in).DeepCopyInto(*out) - } - if in.IdleTimeoutSeconds != nil { - in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds - *out = new(int32) - **out = **in - } - if in.UpscalingMode != nil { - in, out := &in.UpscalingMode, &out.UpscalingMode - *out = new(UpscalingMode) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalerOptions. @@ -101,6 +102,31 @@ func (in *AutoscalerOptions) DeepCopy() *AutoscalerOptions { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GcsFaultToleranceOptions) DeepCopyInto(out *GcsFaultToleranceOptions) { + *out = *in + if in.RedisUsername != nil { + in, out := &in.RedisUsername, &out.RedisUsername + *out = new(RedisCredential) + (*in).DeepCopyInto(*out) + } + if in.RedisPassword != nil { + in, out := &in.RedisPassword, &out.RedisPassword + *out = new(RedisCredential) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GcsFaultToleranceOptions. +func (in *GcsFaultToleranceOptions) DeepCopy() *GcsFaultToleranceOptions { + if in == nil { + return nil + } + out := new(GcsFaultToleranceOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HeadGroupSpec) DeepCopyInto(out *HeadGroupSpec) { *out = *in @@ -211,19 +237,16 @@ func (in *RayClusterList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayClusterSpec) DeepCopyInto(out *RayClusterSpec) { *out = *in - in.HeadGroupSpec.DeepCopyInto(&out.HeadGroupSpec) - if in.WorkerGroupSpecs != nil { - in, out := &in.WorkerGroupSpecs, &out.WorkerGroupSpecs - *out = make([]WorkerGroupSpec, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.EnableInTreeAutoscaling != nil { - in, out := &in.EnableInTreeAutoscaling, &out.EnableInTreeAutoscaling + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend *out = new(bool) **out = **in } + if in.ManagedBy != nil { + in, out := &in.ManagedBy, &out.ManagedBy + *out = new(string) + **out = **in + } if in.AutoscalerOptions != nil { in, out := &in.AutoscalerOptions, &out.AutoscalerOptions *out = new(AutoscalerOptions) @@ -236,11 +259,24 @@ func (in *RayClusterSpec) DeepCopyInto(out *RayClusterSpec) { (*out)[key] = val } } - if in.Suspend != nil { - in, out := &in.Suspend, &out.Suspend + if in.EnableInTreeAutoscaling != nil { + in, out := &in.EnableInTreeAutoscaling, &out.EnableInTreeAutoscaling *out = new(bool) **out = **in } + if in.GcsFaultToleranceOptions != nil { + in, out := &in.GcsFaultToleranceOptions, &out.GcsFaultToleranceOptions + *out = new(GcsFaultToleranceOptions) + (*in).DeepCopyInto(*out) + } + in.HeadGroupSpec.DeepCopyInto(&out.HeadGroupSpec) + if in.WorkerGroupSpecs != nil { + in, out := &in.WorkerGroupSpecs, &out.WorkerGroupSpecs + *out = make([]WorkerGroupSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayClusterSpec. @@ -264,6 +300,13 @@ func (in *RayClusterStatus) DeepCopyInto(out *RayClusterStatus) { in, out := &in.LastUpdateTime, &out.LastUpdateTime *out = (*in).DeepCopy() } + if in.StateTransitionTimes != nil { + in, out := &in.StateTransitionTimes, &out.StateTransitionTimes + *out = make(map[ClusterState]*metav1.Time, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } if in.Endpoints != nil { in, out := &in.Endpoints, &out.Endpoints *out = make(map[string]string, len(*in)) @@ -272,6 +315,13 @@ func (in *RayClusterStatus) DeepCopyInto(out *RayClusterStatus) { } } out.Head = in.Head + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayClusterStatus. @@ -346,23 +396,33 @@ func (in *RayJobList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayJobSpec) DeepCopyInto(out *RayJobSpec) { *out = *in - if in.Metadata != nil { - in, out := &in.Metadata, &out.Metadata - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } if in.ActiveDeadlineSeconds != nil { in, out := &in.ActiveDeadlineSeconds, &out.ActiveDeadlineSeconds *out = new(int32) **out = **in } + if in.BackoffLimit != nil { + in, out := &in.BackoffLimit, &out.BackoffLimit + *out = new(int32) + **out = **in + } if in.RayClusterSpec != nil { in, out := &in.RayClusterSpec, &out.RayClusterSpec *out = new(RayClusterSpec) (*in).DeepCopyInto(*out) } + if in.SubmitterPodTemplate != nil { + in, out := &in.SubmitterPodTemplate, &out.SubmitterPodTemplate + *out = new(corev1.PodTemplateSpec) + (*in).DeepCopyInto(*out) + } + if in.Metadata != nil { + in, out := &in.Metadata, &out.Metadata + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.ClusterSelector != nil { in, out := &in.ClusterSelector, &out.ClusterSelector *out = make(map[string]string, len(*in)) @@ -370,11 +430,21 @@ func (in *RayJobSpec) DeepCopyInto(out *RayJobSpec) { (*out)[key] = val } } - if in.SubmitterPodTemplate != nil { - in, out := &in.SubmitterPodTemplate, &out.SubmitterPodTemplate - *out = new(corev1.PodTemplateSpec) + if in.SubmitterConfig != nil { + in, out := &in.SubmitterConfig, &out.SubmitterConfig + *out = new(SubmitterConfig) (*in).DeepCopyInto(*out) } + if in.ManagedBy != nil { + in, out := &in.ManagedBy, &out.ManagedBy + *out = new(string) + **out = **in + } + if in.DeletionPolicy != nil { + in, out := &in.DeletionPolicy, &out.DeletionPolicy + *out = new(DeletionPolicy) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayJobSpec. @@ -398,6 +468,16 @@ func (in *RayJobStatus) DeepCopyInto(out *RayJobStatus) { in, out := &in.EndTime, &out.EndTime *out = (*in).DeepCopy() } + if in.Succeeded != nil { + in, out := &in.Succeeded, &out.Succeeded + *out = new(int32) + **out = **in + } + if in.Failed != nil { + in, out := &in.Failed, &out.Failed + *out = new(int32) + **out = **in + } in.RayClusterStatus.DeepCopyInto(&out.RayClusterStatus) } @@ -473,7 +553,6 @@ func (in *RayServiceList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = *in - in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) if in.ServiceUnhealthySecondThreshold != nil { in, out := &in.ServiceUnhealthySecondThreshold, &out.ServiceUnhealthySecondThreshold *out = new(int32) @@ -489,6 +568,12 @@ func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = new(corev1.Service) (*in).DeepCopyInto(*out) } + if in.UpgradeStrategy != nil { + in, out := &in.UpgradeStrategy, &out.UpgradeStrategy + *out = new(RayServiceUpgradeStrategy) + (*in).DeepCopyInto(*out) + } + in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceSpec. @@ -527,12 +612,12 @@ func (in *RayServiceStatus) DeepCopy() *RayServiceStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayServiceStatuses) DeepCopyInto(out *RayServiceStatuses) { *out = *in - in.ActiveServiceStatus.DeepCopyInto(&out.ActiveServiceStatus) - in.PendingServiceStatus.DeepCopyInto(&out.PendingServiceStatus) if in.LastUpdateTime != nil { in, out := &in.LastUpdateTime, &out.LastUpdateTime *out = (*in).DeepCopy() } + in.ActiveServiceStatus.DeepCopyInto(&out.ActiveServiceStatus) + in.PendingServiceStatus.DeepCopyInto(&out.PendingServiceStatus) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceStatuses. @@ -545,6 +630,46 @@ func (in *RayServiceStatuses) DeepCopy() *RayServiceStatuses { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RayServiceUpgradeStrategy) DeepCopyInto(out *RayServiceUpgradeStrategy) { + *out = *in + if in.Type != nil { + in, out := &in.Type, &out.Type + *out = new(RayServiceUpgradeType) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceUpgradeStrategy. +func (in *RayServiceUpgradeStrategy) DeepCopy() *RayServiceUpgradeStrategy { + if in == nil { + return nil + } + out := new(RayServiceUpgradeStrategy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RedisCredential) DeepCopyInto(out *RedisCredential) { + *out = *in + if in.ValueFrom != nil { + in, out := &in.ValueFrom, &out.ValueFrom + *out = new(corev1.EnvVarSource) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RedisCredential. +func (in *RedisCredential) DeepCopy() *RedisCredential { + if in == nil { + return nil + } + out := new(RedisCredential) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ScaleStrategy) DeepCopyInto(out *ScaleStrategy) { *out = *in @@ -584,9 +709,34 @@ func (in *ServeDeploymentStatus) DeepCopy() *ServeDeploymentStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SubmitterConfig) DeepCopyInto(out *SubmitterConfig) { + *out = *in + if in.BackoffLimit != nil { + in, out := &in.BackoffLimit, &out.BackoffLimit + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SubmitterConfig. +func (in *SubmitterConfig) DeepCopy() *SubmitterConfig { + if in == nil { + return nil + } + out := new(SubmitterConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkerGroupSpec) DeepCopyInto(out *WorkerGroupSpec) { *out = *in + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend + *out = new(bool) + **out = **in + } if in.Replicas != nil { in, out := &in.Replicas, &out.Replicas *out = new(int32) @@ -602,6 +752,11 @@ func (in *WorkerGroupSpec) DeepCopyInto(out *WorkerGroupSpec) { *out = new(int32) **out = **in } + if in.IdleTimeoutSeconds != nil { + in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds + *out = new(int32) + **out = **in + } if in.RayStartParams != nil { in, out := &in.RayStartParams, &out.RayStartParams *out = make(map[string]string, len(*in)) diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index f885db75e19..3018540c865 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -11,22 +11,22 @@ import ( // RayClusterSpec defines the desired state of RayCluster type RayClusterSpec struct { + // EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs + EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` + // AutoscalerOptions specifies optional configuration for the Ray autoscaler. + AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"` + // Suspend indicates whether a RayCluster should be suspended. + // A suspended RayCluster will have head pods and worker pods deleted. + Suspend *bool `json:"suspend,omitempty"` + HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file // HeadGroupSpecs are the spec for the head pod HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"` - // WorkerGroupSpecs are the specs for the worker pods - WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"` // RayVersion is used to determine the command for the Kubernetes Job managed by RayJob RayVersion string `json:"rayVersion,omitempty"` - // EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs - EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` - // AutoscalerOptions specifies optional configuration for the Ray autoscaler. - AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"` - HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` - // Suspend indicates whether a RayCluster should be suspended. - // A suspended RayCluster will have head pods and worker pods deleted. - Suspend *bool `json:"suspend,omitempty"` + // WorkerGroupSpecs are the specs for the worker pods + WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"` } // HeadGroupSpec are the spec for the head pod @@ -79,12 +79,6 @@ type AutoscalerOptions struct { Image *string `json:"image,omitempty"` // ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. ImagePullPolicy *corev1.PullPolicy `json:"imagePullPolicy,omitempty"` - // Optional list of environment variables to set in the autoscaler container. - Env []corev1.EnvVar `json:"env,omitempty"` - // Optional list of sources to populate environment variables in the autoscaler container. - EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"` - // Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. - VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"` // SecurityContext defines the security options the container should be run with. // If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. // More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ @@ -98,6 +92,12 @@ type AutoscalerOptions struct { // Aggressive: An alias for Default; upscaling is not rate-limited. // It is not read by the KubeRay operator but by the Ray autoscaler. UpscalingMode *UpscalingMode `json:"upscalingMode,omitempty"` + // Optional list of environment variables to set in the autoscaler container. + Env []corev1.EnvVar `json:"env,omitempty"` + // Optional list of sources to populate environment variables in the autoscaler container. + EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"` + // Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. + VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"` } // +kubebuilder:validation:Enum=Default;Aggressive;Conservative @@ -115,18 +115,11 @@ const ( // RayClusterStatus defines the observed state of RayCluster type RayClusterStatus struct { - // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster - // Important: Run "make" to regenerate code after modifying this file - // Status reflects the status of the cluster - State ClusterState `json:"state,omitempty"` - // AvailableWorkerReplicas indicates how many replicas are available in the cluster - AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"` - // DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level. - DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"` - // MinWorkerReplicas indicates sum of minimum replicas of each node group. - MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"` - // MaxWorkerReplicas indicates sum of maximum replicas of each node group. - MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"` + // LastUpdateTime indicates last update timestamp for this cluster status. + // +nullable + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + // Service Endpoints + Endpoints map[string]string `json:"endpoints,omitempty"` // DesiredCPU indicates total desired CPUs for the cluster DesiredCPU resource.Quantity `json:"desiredCPU,omitempty"` // DesiredMemory indicates total desired memory for the cluster @@ -135,15 +128,26 @@ type RayClusterStatus struct { DesiredGPU resource.Quantity `json:"desiredGPU,omitempty"` // DesiredTPU indicates total desired TPUs for the cluster DesiredTPU resource.Quantity `json:"desiredTPU,omitempty"` - // LastUpdateTime indicates last update timestamp for this cluster status. - // +nullable - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` - // Service Endpoints - Endpoints map[string]string `json:"endpoints,omitempty"` // Head info Head HeadInfo `json:"head,omitempty"` + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make" to regenerate code after modifying this file + // Status reflects the status of the cluster + // + // Deprecated: the State field is replaced by the Conditions field. + State ClusterState `json:"state,omitempty"` // Reason provides more information about current State Reason string `json:"reason,omitempty"` + // ReadyWorkerReplicas indicates how many worker replicas are ready in the cluster + ReadyWorkerReplicas int32 `json:"readyWorkerReplicas,omitempty"` + // AvailableWorkerReplicas indicates how many replicas are available in the cluster + AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"` + // DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level. + DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"` + // MinWorkerReplicas indicates sum of minimum replicas of each node group. + MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"` + // MaxWorkerReplicas indicates sum of maximum replicas of each node group. + MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"` // observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the // RayCluster's generation, which is updated on mutation by the API Server. ObservedGeneration int64 `json:"observedGeneration,omitempty"` diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types_test.go b/ray-operator/apis/ray/v1alpha1/raycluster_types_test.go index 0a74621c71e..a544285c798 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types_test.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types_test.go @@ -6,7 +6,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" ) var myRayCluster = &RayCluster{ @@ -55,9 +55,9 @@ var myRayCluster = &RayCluster{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{ "port": "6379", diff --git a/ray-operator/apis/ray/v1alpha1/rayjob_types.go b/ray-operator/apis/ray/v1alpha1/rayjob_types.go index 904c020eadd..7365728de4a 100644 --- a/ray-operator/apis/ray/v1alpha1/rayjob_types.go +++ b/ray-operator/apis/ray/v1alpha1/rayjob_types.go @@ -43,41 +43,41 @@ const ( // RayJobSpec defines the desired state of RayJob type RayJobSpec struct { + // SubmitterPodTemplate is the template for the pod that will run `ray job submit`. + SubmitterPodTemplate *corev1.PodTemplateSpec `json:"submitterPodTemplate,omitempty"` + // Metadata is data to store along with this job. + Metadata map[string]string `json:"metadata,omitempty"` + // RayClusterSpec is the cluster template to run the job + RayClusterSpec *RayClusterSpec `json:"rayClusterSpec,omitempty"` + // ClusterSelector is used to select running rayclusters by labels + ClusterSelector map[string]string `json:"clusterSelector,omitempty"` // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // Important: Run "make" to regenerate code after modifying this file Entrypoint string `json:"entrypoint"` - // Metadata is data to store along with this job. - Metadata map[string]string `json:"metadata,omitempty"` // RuntimeEnvYAML represents the runtime environment configuration // provided as a multi-line YAML string. RuntimeEnvYAML string `json:"runtimeEnvYAML,omitempty"` // If jobId is not set, a new jobId will be auto-generated. JobId string `json:"jobId,omitempty"` - // ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. - ShutdownAfterJobFinishes bool `json:"shutdownAfterJobFinishes,omitempty"` + // EntrypointResources specifies the custom resources and quantities to reserve for the + // entrypoint command. + EntrypointResources string `json:"entrypointResources,omitempty"` // TTLSecondsAfterFinished is the TTL to clean up RayCluster. // It's only working when ShutdownAfterJobFinishes set to true. // +kubebuilder:default:=0 TTLSecondsAfterFinished int32 `json:"ttlSecondsAfterFinished,omitempty"` - // RayClusterSpec is the cluster template to run the job - RayClusterSpec *RayClusterSpec `json:"rayClusterSpec,omitempty"` - // clusterSelector is used to select running rayclusters by labels - ClusterSelector map[string]string `json:"clusterSelector,omitempty"` - // suspend specifies whether the RayJob controller should create a RayCluster instance + // EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. + EntrypointNumCpus float32 `json:"entrypointNumCpus,omitempty"` + // EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. + EntrypointNumGpus float32 `json:"entrypointNumGpus,omitempty"` + // ShutdownAfterJobFinishes will determine whether to delete the ray cluster once rayJob succeed or failed. + ShutdownAfterJobFinishes bool `json:"shutdownAfterJobFinishes,omitempty"` + // Suspend specifies whether the RayJob controller should create a RayCluster instance // If a job is applied with the suspend field set to true, // the RayCluster will not be created and will wait for the transition to false. // If the RayCluster is already created, it will be deleted. // In case of transition to false a new RayCluster will be created. Suspend bool `json:"suspend,omitempty"` - // SubmitterPodTemplate is the template for the pod that will run `ray job submit`. - SubmitterPodTemplate *corev1.PodTemplateSpec `json:"submitterPodTemplate,omitempty"` - // EntrypointNumCpus specifies the number of cpus to reserve for the entrypoint command. - EntrypointNumCpus float32 `json:"entrypointNumCpus,omitempty"` - // EntrypointNumGpus specifies the number of gpus to reserve for the entrypoint command. - EntrypointNumGpus float32 `json:"entrypointNumGpus,omitempty"` - // EntrypointResources specifies the custom resources and quantities to reserve for the - // entrypoint command. - EntrypointResources string `json:"entrypointResources,omitempty"` } // RayJobStatus defines the observed state of RayJob diff --git a/ray-operator/apis/ray/v1alpha1/rayjob_types_test.go b/ray-operator/apis/ray/v1alpha1/rayjob_types_test.go index 944d49e4798..41cb1db7d1b 100644 --- a/ray-operator/apis/ray/v1alpha1/rayjob_types_test.go +++ b/ray-operator/apis/ray/v1alpha1/rayjob_types_test.go @@ -6,10 +6,10 @@ import ( "testing" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" ) var expectedRayJob = RayJob{ @@ -87,9 +87,9 @@ var expectedRayJob = RayJob{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{ "port": "6379", diff --git a/ray-operator/apis/ray/v1alpha1/rayservice_types.go b/ray-operator/apis/ray/v1alpha1/rayservice_types.go index d80d54ef645..cdba20fd985 100644 --- a/ray-operator/apis/ray/v1alpha1/rayservice_types.go +++ b/ray-operator/apis/ray/v1alpha1/rayservice_types.go @@ -11,14 +11,9 @@ import ( type ServiceStatus string const ( - FailedToGetOrCreateRayCluster ServiceStatus = "FailedToGetOrCreateRayCluster" - WaitForServeDeploymentReady ServiceStatus = "WaitForServeDeploymentReady" - FailedToGetServeDeploymentStatus ServiceStatus = "FailedToGetServeDeploymentStatus" - Running ServiceStatus = "Running" - Restarting ServiceStatus = "Restarting" - FailedToUpdateIngress ServiceStatus = "FailedToUpdateIngress" - FailedToUpdateServingPodLabel ServiceStatus = "FailedToUpdateServingPodLabel" - FailedToUpdateService ServiceStatus = "FailedToUpdateService" + WaitForServeDeploymentReady ServiceStatus = "WaitForServeDeploymentReady" + Running ServiceStatus = "Running" + PreparingNewCluster ServiceStatus = "PreparingNewCluster" ) // These statuses should match Ray Serve's application statuses @@ -52,31 +47,31 @@ var DeploymentStatusEnum = struct { // RayServiceSpec defines the desired state of RayService type RayServiceSpec struct { - // Important: Run "make" to regenerate code after modifying this file - // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. - ServeConfigV2 string `json:"serveConfigV2,omitempty"` - RayClusterSpec RayClusterSpec `json:"rayClusterConfig,omitempty"` + // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. + ServeService *corev1.Service `json:"serveService,omitempty"` // Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` // Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` - // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. - ServeService *corev1.Service `json:"serveService,omitempty"` + // Important: Run "make" to regenerate code after modifying this file + // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. + ServeConfigV2 string `json:"serveConfigV2,omitempty"` + RayClusterSpec RayClusterSpec `json:"rayClusterConfig,omitempty"` } // RayServiceStatuses defines the observed state of RayService // +kubebuilder:printcolumn:name="ServiceStatus",type=string,JSONPath=".status.serviceStatus" type RayServiceStatuses struct { + // LastUpdateTime represents the timestamp when the RayService status was last updated. + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + // ServiceStatus indicates the current RayService status. + ServiceStatus ServiceStatus `json:"serviceStatus,omitempty"` ActiveServiceStatus RayServiceStatus `json:"activeServiceStatus,omitempty"` // Pending Service Status indicates a RayCluster will be created or is being created. PendingServiceStatus RayServiceStatus `json:"pendingServiceStatus,omitempty"` - // ServiceStatus indicates the current RayService status. - ServiceStatus ServiceStatus `json:"serviceStatus,omitempty"` // observedGeneration is the most recent generation observed for this RayService. It corresponds to the // RayService's generation, which is updated on mutation by the API Server. ObservedGeneration int64 `json:"observedGeneration,omitempty"` - // LastUpdateTime represents the timestamp when the RayService status was last updated. - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` } type RayServiceStatus struct { @@ -87,23 +82,23 @@ type RayServiceStatus struct { } type AppStatus struct { - Status string `json:"status,omitempty"` - Message string `json:"message,omitempty"` // Keep track of how long the service is healthy. // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` Deployments map[string]ServeDeploymentStatus `json:"serveDeploymentStatuses,omitempty"` + Status string `json:"status,omitempty"` + Message string `json:"message,omitempty"` } // ServeDeploymentStatus defines the current state of a Serve deployment type ServeDeploymentStatus struct { + // Keep track of how long the service is healthy. + // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. + HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` // Name, Status, Message are from Ray Dashboard and represent a Serve deployment's state. // TODO: change status type to enum Status string `json:"status,omitempty"` Message string `json:"message,omitempty"` - // Keep track of how long the service is healthy. - // Update when Serve deployment is healthy or first time convert to unhealthy from healthy. - HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` } // +kubebuilder:object:root=true diff --git a/ray-operator/apis/ray/v1alpha1/rayservice_types_test.go b/ray-operator/apis/ray/v1alpha1/rayservice_types_test.go index 792307261bb..c8f32982077 100644 --- a/ray-operator/apis/ray/v1alpha1/rayservice_types_test.go +++ b/ray-operator/apis/ray/v1alpha1/rayservice_types_test.go @@ -6,10 +6,10 @@ import ( "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" ) var myRayService = &RayService{ @@ -87,9 +87,9 @@ var myRayService = &RayService{ }, WorkerGroupSpecs: []WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{ "port": "6379", diff --git a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go index 8fede4b07c8..ef3fb917ac9 100644 --- a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go @@ -53,6 +53,21 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { *out = new(v1.PullPolicy) **out = **in } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(v1.SecurityContext) + (*in).DeepCopyInto(*out) + } + if in.IdleTimeoutSeconds != nil { + in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds + *out = new(int32) + **out = **in + } + if in.UpscalingMode != nil { + in, out := &in.UpscalingMode, &out.UpscalingMode + *out = new(UpscalingMode) + **out = **in + } if in.Env != nil { in, out := &in.Env, &out.Env *out = make([]v1.EnvVar, len(*in)) @@ -74,21 +89,6 @@ func (in *AutoscalerOptions) DeepCopyInto(out *AutoscalerOptions) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.SecurityContext != nil { - in, out := &in.SecurityContext, &out.SecurityContext - *out = new(v1.SecurityContext) - (*in).DeepCopyInto(*out) - } - if in.IdleTimeoutSeconds != nil { - in, out := &in.IdleTimeoutSeconds, &out.IdleTimeoutSeconds - *out = new(int32) - **out = **in - } - if in.UpscalingMode != nil { - in, out := &in.UpscalingMode, &out.UpscalingMode - *out = new(UpscalingMode) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalerOptions. @@ -211,14 +211,6 @@ func (in *RayClusterList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayClusterSpec) DeepCopyInto(out *RayClusterSpec) { *out = *in - in.HeadGroupSpec.DeepCopyInto(&out.HeadGroupSpec) - if in.WorkerGroupSpecs != nil { - in, out := &in.WorkerGroupSpecs, &out.WorkerGroupSpecs - *out = make([]WorkerGroupSpec, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } if in.EnableInTreeAutoscaling != nil { in, out := &in.EnableInTreeAutoscaling, &out.EnableInTreeAutoscaling *out = new(bool) @@ -229,6 +221,11 @@ func (in *RayClusterSpec) DeepCopyInto(out *RayClusterSpec) { *out = new(AutoscalerOptions) (*in).DeepCopyInto(*out) } + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend + *out = new(bool) + **out = **in + } if in.HeadServiceAnnotations != nil { in, out := &in.HeadServiceAnnotations, &out.HeadServiceAnnotations *out = make(map[string]string, len(*in)) @@ -236,10 +233,13 @@ func (in *RayClusterSpec) DeepCopyInto(out *RayClusterSpec) { (*out)[key] = val } } - if in.Suspend != nil { - in, out := &in.Suspend, &out.Suspend - *out = new(bool) - **out = **in + in.HeadGroupSpec.DeepCopyInto(&out.HeadGroupSpec) + if in.WorkerGroupSpecs != nil { + in, out := &in.WorkerGroupSpecs, &out.WorkerGroupSpecs + *out = make([]WorkerGroupSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } } @@ -256,10 +256,6 @@ func (in *RayClusterSpec) DeepCopy() *RayClusterSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayClusterStatus) DeepCopyInto(out *RayClusterStatus) { *out = *in - out.DesiredCPU = in.DesiredCPU.DeepCopy() - out.DesiredMemory = in.DesiredMemory.DeepCopy() - out.DesiredGPU = in.DesiredGPU.DeepCopy() - out.DesiredTPU = in.DesiredTPU.DeepCopy() if in.LastUpdateTime != nil { in, out := &in.LastUpdateTime, &out.LastUpdateTime *out = (*in).DeepCopy() @@ -271,6 +267,10 @@ func (in *RayClusterStatus) DeepCopyInto(out *RayClusterStatus) { (*out)[key] = val } } + out.DesiredCPU = in.DesiredCPU.DeepCopy() + out.DesiredMemory = in.DesiredMemory.DeepCopy() + out.DesiredGPU = in.DesiredGPU.DeepCopy() + out.DesiredTPU = in.DesiredTPU.DeepCopy() out.Head = in.Head } @@ -346,6 +346,11 @@ func (in *RayJobList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayJobSpec) DeepCopyInto(out *RayJobSpec) { *out = *in + if in.SubmitterPodTemplate != nil { + in, out := &in.SubmitterPodTemplate, &out.SubmitterPodTemplate + *out = new(v1.PodTemplateSpec) + (*in).DeepCopyInto(*out) + } if in.Metadata != nil { in, out := &in.Metadata, &out.Metadata *out = make(map[string]string, len(*in)) @@ -365,11 +370,6 @@ func (in *RayJobSpec) DeepCopyInto(out *RayJobSpec) { (*out)[key] = val } } - if in.SubmitterPodTemplate != nil { - in, out := &in.SubmitterPodTemplate, &out.SubmitterPodTemplate - *out = new(v1.PodTemplateSpec) - (*in).DeepCopyInto(*out) - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayJobSpec. @@ -468,7 +468,11 @@ func (in *RayServiceList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = *in - in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) + if in.ServeService != nil { + in, out := &in.ServeService, &out.ServeService + *out = new(v1.Service) + (*in).DeepCopyInto(*out) + } if in.ServiceUnhealthySecondThreshold != nil { in, out := &in.ServiceUnhealthySecondThreshold, &out.ServiceUnhealthySecondThreshold *out = new(int32) @@ -479,11 +483,7 @@ func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = new(int32) **out = **in } - if in.ServeService != nil { - in, out := &in.ServeService, &out.ServeService - *out = new(v1.Service) - (*in).DeepCopyInto(*out) - } + in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceSpec. @@ -522,12 +522,12 @@ func (in *RayServiceStatus) DeepCopy() *RayServiceStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RayServiceStatuses) DeepCopyInto(out *RayServiceStatuses) { *out = *in - in.ActiveServiceStatus.DeepCopyInto(&out.ActiveServiceStatus) - in.PendingServiceStatus.DeepCopyInto(&out.PendingServiceStatus) if in.LastUpdateTime != nil { in, out := &in.LastUpdateTime, &out.LastUpdateTime *out = (*in).DeepCopy() } + in.ActiveServiceStatus.DeepCopyInto(&out.ActiveServiceStatus) + in.PendingServiceStatus.DeepCopyInto(&out.PendingServiceStatus) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceStatuses. diff --git a/ray-operator/config/certmanager/kustomization.yaml b/ray-operator/config/certmanager/kustomization.yaml index bebea5a595e..b2ce72a69cd 100644 --- a/ray-operator/config/certmanager/kustomization.yaml +++ b/ray-operator/config/certmanager/kustomization.yaml @@ -3,3 +3,5 @@ resources: configurations: - kustomizeconfig.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization diff --git a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml index 9f037abe88f..bd062f63a56 100644 --- a/ray-operator/config/crd/bases/ray.io_rayclusters.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayclusters.ray.io spec: group: ray.io @@ -78,6 +78,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -116,6 +117,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -134,6 +136,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -144,6 +147,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -193,16 +197,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -267,6 +282,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -279,6 +296,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -323,6 +469,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -347,6 +494,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -394,6 +542,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -447,6 +597,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -468,6 +620,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -523,11 +676,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -539,11 +694,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -554,6 +711,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -570,11 +728,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -586,14 +746,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -619,17 +782,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -643,11 +818,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -658,6 +835,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -671,6 +849,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -687,17 +866,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -711,11 +902,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -726,12 +919,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -753,17 +948,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -777,11 +984,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -792,6 +1001,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -805,6 +1015,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -821,17 +1032,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -845,11 +1068,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -860,12 +1085,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -877,10 +1104,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -895,6 +1124,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -933,6 +1163,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -945,12 +1176,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -961,6 +1196,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -968,6 +1204,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -982,6 +1219,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -999,6 +1237,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1011,6 +1250,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1032,6 +1279,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1049,6 +1297,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1061,6 +1310,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1083,6 +1340,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1113,6 +1371,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1187,6 +1446,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1217,6 +1477,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1307,16 +1568,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1372,6 +1644,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1402,6 +1675,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1464,6 +1738,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1475,6 +1752,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1484,18 +1763,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1505,10 +1791,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1521,10 +1809,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1539,6 +1829,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1577,6 +1868,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1589,12 +1881,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1605,6 +1901,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1612,6 +1909,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1626,6 +1924,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1643,6 +1942,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1655,6 +1955,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1676,6 +1984,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1693,6 +2002,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1705,6 +2015,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1727,6 +2045,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1757,6 +2076,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1831,6 +2151,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1861,6 +2182,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1951,16 +2273,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2016,6 +2349,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2046,6 +2380,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2110,6 +2445,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2121,6 +2459,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2130,12 +2470,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2143,10 +2489,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2161,10 +2513,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2172,10 +2528,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2190,6 +2548,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2228,6 +2587,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2240,12 +2600,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2256,6 +2620,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2263,6 +2628,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2277,6 +2643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2294,6 +2661,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2306,6 +2674,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2327,6 +2703,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2344,6 +2721,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2356,6 +2734,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2378,6 +2764,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2408,6 +2795,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2482,6 +2870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2512,6 +2901,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2602,16 +2992,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2667,6 +3068,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2697,6 +3099,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2759,6 +3162,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2770,6 +3176,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2779,12 +3187,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2823,6 +3237,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2862,6 +3277,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2900,6 +3324,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2912,6 +3337,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2953,6 +3379,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2969,11 +3396,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3062,6 +3491,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3071,6 +3501,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3088,6 +3519,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3116,7 +3548,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3131,6 +3565,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3186,6 +3621,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3227,6 +3663,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3256,18 +3693,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3298,11 +3723,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3311,6 +3738,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3333,10 +3762,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3353,6 +3784,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3433,11 +3865,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3499,6 +3933,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3516,7 +3989,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3562,6 +4037,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3580,7 +4056,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3600,6 +4078,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3631,6 +4110,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3638,6 +4118,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3660,6 +4141,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3698,6 +4180,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3712,6 +4195,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3737,6 +4221,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3749,6 +4236,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3758,6 +4253,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3785,6 +4283,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3831,11 +4331,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3847,11 +4349,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3862,6 +4366,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3878,11 +4383,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3894,14 +4401,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3927,17 +4437,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3951,11 +4473,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3966,6 +4490,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3979,6 +4504,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -3995,17 +4521,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4019,11 +4557,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4034,12 +4574,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4061,17 +4603,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4085,11 +4639,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4100,6 +4656,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4113,6 +4670,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4129,17 +4687,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4153,11 +4723,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4168,12 +4740,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4185,10 +4759,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4203,6 +4779,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4241,6 +4818,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4253,12 +4831,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4269,6 +4851,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4276,6 +4859,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4290,6 +4874,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4307,6 +4892,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4319,6 +4905,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4340,6 +4934,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4357,6 +4952,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4369,6 +4965,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4391,6 +4995,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4421,6 +5026,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4495,6 +5101,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4525,6 +5132,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4615,16 +5223,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4680,6 +5299,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4710,6 +5330,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4772,6 +5393,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4783,6 +5407,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4792,18 +5418,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4813,10 +5446,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4829,10 +5464,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4847,6 +5484,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4885,6 +5523,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4897,12 +5536,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4913,6 +5556,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4920,6 +5564,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4934,6 +5579,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4951,6 +5597,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4963,6 +5610,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4984,6 +5639,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5001,6 +5657,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5011,7 +5668,15 @@ spec: scheme: type: string required: - - port + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds type: object tcpSocket: properties: @@ -5035,6 +5700,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5065,6 +5731,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5139,6 +5806,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5169,6 +5837,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5259,16 +5928,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5324,6 +6004,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5354,6 +6035,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5418,6 +6100,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5429,6 +6114,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5438,12 +6125,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5451,10 +6144,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5469,10 +6168,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5480,10 +6183,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5498,6 +6203,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5536,6 +6242,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5548,12 +6255,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5564,6 +6275,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5571,6 +6283,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5585,6 +6298,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5602,6 +6316,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5614,6 +6329,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5635,6 +6358,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5652,6 +6376,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5664,6 +6389,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5686,6 +6419,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5716,6 +6450,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5790,6 +6525,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5820,6 +6556,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5910,16 +6647,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5975,6 +6723,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -6005,6 +6754,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6067,6 +6817,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6078,6 +6831,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6087,12 +6842,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6131,6 +6892,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6170,6 +6932,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6208,6 +6979,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6220,6 +6992,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6261,6 +7034,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6277,11 +7051,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6370,6 +7146,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6379,6 +7156,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6396,6 +7174,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6424,7 +7203,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6439,6 +7220,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6494,6 +7276,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6535,6 +7318,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6564,18 +7348,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6606,11 +7378,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6619,6 +7393,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6641,10 +7417,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6661,6 +7439,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6741,11 +7520,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6807,6 +7588,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6824,7 +7644,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6870,6 +7692,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6888,7 +7711,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6908,6 +7733,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6939,6 +7765,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6946,6 +7773,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6968,6 +7796,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7006,6 +7835,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -7020,6 +7850,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7045,6 +7876,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7065,6 +7899,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7100,8 +7973,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7116,10 +7993,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object served: true @@ -7188,6 +8073,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7226,6 +8112,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7244,6 +8131,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7254,6 +8142,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7303,16 +8192,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7377,6 +8277,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -7433,6 +8335,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7457,6 +8360,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7504,6 +8408,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7557,6 +8463,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7578,6 +8486,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -7633,11 +8542,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7649,11 +8560,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7664,6 +8577,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7680,11 +8594,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7696,14 +8612,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -7729,17 +8648,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7753,11 +8684,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7768,6 +8701,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7781,6 +8715,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7797,17 +8732,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7821,11 +8768,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7836,12 +8785,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -7863,17 +8814,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7887,11 +8850,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7902,6 +8867,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7915,6 +8881,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7931,17 +8898,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7955,11 +8934,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7970,12 +8951,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -7987,10 +8970,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8005,6 +8990,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8043,6 +9029,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8055,12 +9042,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8071,6 +9062,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8078,6 +9070,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8092,6 +9085,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8109,6 +9103,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8121,6 +9116,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8142,6 +9145,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8159,6 +9163,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8171,6 +9176,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8193,6 +9206,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8223,6 +9237,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8297,6 +9312,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8327,6 +9343,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8417,16 +9434,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8482,6 +9510,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8512,6 +9541,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8574,6 +9604,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8585,6 +9618,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8594,18 +9629,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8615,10 +9657,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8631,10 +9675,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8649,6 +9695,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8687,6 +9734,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8699,12 +9747,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8715,6 +9767,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8722,6 +9775,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8736,6 +9790,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8753,6 +9808,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8765,6 +9821,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8786,6 +9850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8803,6 +9868,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8815,6 +9881,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8837,6 +9911,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8867,6 +9942,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8941,6 +10017,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8971,6 +10048,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9061,16 +10139,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9126,6 +10215,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9156,6 +10246,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9220,6 +10311,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9231,6 +10325,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9240,12 +10336,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -9253,10 +10355,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -9271,10 +10379,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -9282,10 +10394,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -9300,6 +10414,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9338,6 +10453,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9350,12 +10466,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -9366,6 +10486,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -9373,6 +10494,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9387,6 +10509,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9404,6 +10527,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9416,6 +10540,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9437,6 +10569,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9454,6 +10587,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9466,6 +10600,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9488,6 +10630,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9518,6 +10661,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9592,6 +10736,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9622,6 +10767,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9712,16 +10858,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9777,6 +10934,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9807,6 +10965,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9869,6 +11028,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9880,6 +11042,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9889,12 +11053,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -9933,6 +11103,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -9972,6 +11143,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -10010,6 +11190,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -10022,6 +11203,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -10063,6 +11245,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -10079,11 +11262,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10172,6 +11357,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -10181,6 +11367,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10198,6 +11385,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10226,7 +11414,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10241,6 +11431,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10296,6 +11487,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -10337,6 +11529,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -10366,18 +11559,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -10408,11 +11589,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10421,6 +11604,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -10443,10 +11628,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -10463,6 +11650,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10543,11 +11731,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10609,6 +11799,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10626,7 +11855,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10672,6 +11903,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10690,7 +11922,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10710,6 +11944,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -10741,6 +11976,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -10748,6 +11984,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10770,6 +12007,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10808,6 +12046,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -10822,6 +12061,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10847,6 +12087,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -10937,11 +12180,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10953,11 +12198,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -10968,6 +12215,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -10984,11 +12232,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11000,14 +12250,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11033,17 +12286,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11057,11 +12322,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11072,6 +12339,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11085,6 +12353,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11101,17 +12370,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11125,11 +12406,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11140,12 +12423,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11167,17 +12452,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11191,11 +12488,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11206,6 +12505,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11219,6 +12519,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11235,17 +12536,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11259,11 +12572,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11274,12 +12589,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11291,10 +12608,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11309,6 +12628,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11347,6 +12667,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11359,12 +12680,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11375,6 +12700,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11382,6 +12708,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11396,6 +12723,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11413,6 +12741,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11425,6 +12754,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11446,6 +12783,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11463,6 +12801,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11475,6 +12814,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11497,6 +12844,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11527,6 +12875,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11601,6 +12950,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11631,6 +12981,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11721,16 +13072,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -11786,6 +13148,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11816,6 +13179,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11878,6 +13242,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -11889,6 +13256,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -11898,18 +13267,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -11919,10 +13295,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -11935,10 +13313,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11953,6 +13333,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11991,6 +13372,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12003,12 +13385,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12019,6 +13405,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12026,6 +13413,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12040,6 +13428,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12057,6 +13446,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12069,6 +13459,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12090,6 +13488,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12107,6 +13506,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12119,6 +13519,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12141,6 +13549,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12171,6 +13580,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12245,6 +13655,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12275,6 +13686,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12365,16 +13777,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12430,6 +13853,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12460,6 +13884,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12524,6 +13949,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12535,6 +13963,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12544,12 +13974,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12557,10 +13993,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12575,10 +14017,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12586,10 +14032,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12604,6 +14052,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12642,6 +14091,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12654,12 +14104,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12670,6 +14124,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12677,6 +14132,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12691,6 +14147,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12708,6 +14165,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12720,6 +14178,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12741,6 +14207,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12758,6 +14225,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12770,6 +14238,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12792,6 +14268,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12822,6 +14299,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12896,6 +14374,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12926,6 +14405,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13016,16 +14496,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13081,6 +14572,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13111,6 +14603,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13173,6 +14666,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13184,6 +14680,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13193,12 +14691,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13237,6 +14741,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13276,6 +14781,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13314,6 +14828,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13326,6 +14841,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13367,6 +14883,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13383,11 +14900,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13476,6 +14995,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13485,6 +15005,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13502,6 +15023,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13530,7 +15052,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13545,6 +15069,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13600,6 +15125,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13641,6 +15167,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13670,18 +15197,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13712,11 +15227,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13725,6 +15242,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -13747,10 +15266,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -13767,6 +15288,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13847,11 +15369,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13913,6 +15437,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -13930,7 +15493,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13976,6 +15541,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -13994,7 +15560,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14014,6 +15582,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14045,6 +15614,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14052,6 +15622,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14074,6 +15645,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14112,6 +15684,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14126,6 +15699,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14151,6 +15725,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14222,6 +15799,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml index 6fc5f98e204..580b50c7c5b 100644 --- a/ray-operator/config/crd/bases/ray.io_rayjobs.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayjobs.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayjobs.ray.io spec: group: ray.io @@ -50,10 +50,20 @@ spec: activeDeadlineSeconds: format: int32 type: integer + backoffLimit: + default: 0 + format: int32 + type: integer clusterSelector: additionalProperties: type: string type: object + deletionPolicy: + type: string + x-kubernetes-validations: + - message: the deletionPolicy field value must be either 'DeleteCluster', + 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone' + rule: self in ['DeleteCluster', 'DeleteWorkers', 'DeleteSelf', 'DeleteNone'] entrypoint: type: string entrypointNumCpus: @@ -64,6 +74,14 @@ spec: type: string jobId: type: string + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] metadata: additionalProperties: type: string @@ -86,6 +104,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -124,6 +143,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -142,6 +162,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -152,6 +173,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -201,16 +223,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -275,6 +308,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -287,6 +322,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -331,6 +495,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -355,6 +520,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -402,6 +568,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -455,6 +623,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -476,6 +646,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -531,11 +702,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -547,11 +720,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -562,6 +737,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -578,11 +754,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -594,14 +772,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -627,17 +808,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -651,11 +844,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -666,6 +861,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -679,6 +875,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -695,17 +892,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -719,11 +928,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -734,12 +945,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -761,17 +974,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -785,11 +1010,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -800,6 +1027,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -813,6 +1041,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -829,17 +1058,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -853,11 +1094,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -868,12 +1111,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -885,10 +1130,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -903,6 +1150,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -941,6 +1189,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -953,12 +1202,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -969,6 +1222,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -976,6 +1230,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -990,6 +1245,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1007,6 +1263,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1019,6 +1276,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1040,6 +1305,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1057,6 +1323,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1069,6 +1336,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1091,6 +1366,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1121,6 +1397,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1195,6 +1472,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1225,6 +1503,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1315,16 +1594,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1380,6 +1670,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1410,6 +1701,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1472,6 +1764,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1483,6 +1778,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1492,18 +1789,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1513,10 +1817,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1529,10 +1835,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1547,6 +1855,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1585,6 +1894,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1597,12 +1907,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1613,6 +1927,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1620,6 +1935,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1634,6 +1950,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1651,6 +1968,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1663,6 +1981,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1684,6 +2010,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1701,6 +2028,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1713,6 +2041,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1735,6 +2071,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1765,6 +2102,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1839,6 +2177,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1869,6 +2208,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1959,16 +2299,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2024,6 +2375,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2054,6 +2406,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2118,6 +2471,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2129,6 +2485,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2138,12 +2496,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2151,10 +2515,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2169,10 +2539,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2180,10 +2554,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2198,6 +2574,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2236,6 +2613,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2248,12 +2626,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2264,6 +2646,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2271,6 +2654,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2285,6 +2669,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2302,6 +2687,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2314,6 +2700,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2335,6 +2729,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2352,6 +2747,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2364,6 +2760,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2386,6 +2790,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2416,6 +2821,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2490,6 +2896,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2520,6 +2927,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2610,16 +3018,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2675,6 +3094,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2705,6 +3125,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2767,6 +3188,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2778,6 +3202,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2787,12 +3213,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2831,6 +3263,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2870,6 +3303,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2908,6 +3350,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2920,6 +3363,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2961,6 +3405,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2977,11 +3422,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3070,6 +3517,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3079,6 +3527,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3096,6 +3545,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3124,7 +3574,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3139,6 +3591,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3194,6 +3647,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3235,6 +3689,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3264,18 +3719,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3306,11 +3749,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3319,6 +3764,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3341,10 +3788,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3361,6 +3810,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3441,11 +3891,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3507,6 +3959,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3524,7 +4015,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3570,6 +4063,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3588,7 +4082,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3608,6 +4104,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3639,6 +4136,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3646,6 +4144,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3668,6 +4167,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3706,6 +4206,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3720,6 +4221,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3745,6 +4247,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3757,6 +4262,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3766,6 +4279,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3793,6 +4309,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3839,11 +4357,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3855,11 +4375,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3870,6 +4392,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3886,11 +4409,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3902,14 +4427,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3935,17 +4463,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3959,11 +4499,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3974,6 +4516,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3987,6 +4530,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4003,17 +4547,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4027,11 +4583,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4042,12 +4600,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4069,17 +4629,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4093,11 +4665,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4108,6 +4682,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4121,6 +4696,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4137,17 +4713,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4161,11 +4749,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4176,12 +4766,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4193,10 +4785,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4211,6 +4805,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4249,6 +4844,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4261,12 +4857,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4277,6 +4877,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4284,6 +4885,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4298,6 +4900,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4315,6 +4918,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4327,6 +4931,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4348,6 +4960,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4365,6 +4978,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4377,6 +4991,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4399,6 +5021,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4429,6 +5052,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4503,6 +5127,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4533,6 +5158,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4623,16 +5249,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4688,6 +5325,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4718,6 +5356,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4780,6 +5419,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4791,6 +5433,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4800,18 +5444,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4821,10 +5472,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4837,10 +5490,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4855,6 +5510,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4893,6 +5549,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4905,12 +5562,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4921,6 +5582,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4928,6 +5590,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4942,6 +5605,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4959,6 +5623,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4971,6 +5636,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4992,6 +5665,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5009,6 +5683,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5021,6 +5696,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5043,6 +5726,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5073,6 +5757,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5147,6 +5832,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5177,6 +5863,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5267,16 +5954,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5332,6 +6030,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5362,6 +6061,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5426,6 +6126,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5437,6 +6140,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5446,12 +6151,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5459,10 +6170,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5477,10 +6194,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5488,10 +6209,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5506,6 +6229,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5544,6 +6268,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5556,12 +6281,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5572,6 +6301,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5579,6 +6309,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5593,6 +6324,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5610,6 +6342,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5622,6 +6355,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5643,6 +6384,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5660,6 +6402,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5672,6 +6415,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5694,6 +6445,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5724,6 +6476,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5798,6 +6551,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5828,6 +6582,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5918,16 +6673,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5983,6 +6749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -6013,6 +6780,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6075,6 +6843,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6086,6 +6857,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6095,12 +6868,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6139,6 +6918,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6178,6 +6958,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6216,6 +7005,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6228,6 +7018,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6269,6 +7060,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6285,11 +7077,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6378,6 +7172,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6387,6 +7182,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6404,6 +7200,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6432,7 +7229,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6447,6 +7246,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6502,6 +7302,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6543,6 +7344,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6572,18 +7374,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6614,11 +7404,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6627,6 +7419,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6649,10 +7443,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6669,6 +7465,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6749,11 +7546,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6815,6 +7614,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6832,7 +7670,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6878,6 +7718,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6896,7 +7737,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6916,6 +7759,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6947,6 +7791,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6954,6 +7799,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6976,6 +7822,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7014,6 +7861,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -7028,6 +7876,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7053,6 +7902,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7075,6 +7927,12 @@ spec: submissionMode: default: K8sJobMode type: string + submitterConfig: + properties: + backoffLimit: + format: int32 + type: integer + type: object submitterPodTemplate: properties: metadata: @@ -7121,11 +7979,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7137,11 +7997,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7152,6 +8014,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7168,11 +8031,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7184,14 +8049,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -7217,17 +8085,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7241,11 +8121,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7256,6 +8138,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7269,6 +8152,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7285,17 +8169,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7309,11 +8205,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7324,12 +8222,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -7351,17 +8251,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7375,11 +8287,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7390,6 +8304,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -7403,6 +8318,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -7419,17 +8335,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -7443,11 +8371,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -7458,12 +8388,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -7475,10 +8407,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -7493,6 +8427,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7531,6 +8466,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7543,12 +8479,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7559,6 +8499,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7566,6 +8507,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -7580,6 +8522,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -7597,6 +8540,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7609,6 +8553,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -7630,6 +8582,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -7647,6 +8600,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7659,6 +8613,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -7681,6 +8643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -7711,6 +8674,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7785,6 +8749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -7815,6 +8780,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -7905,16 +8871,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7970,6 +8947,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8000,6 +8978,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8062,6 +9041,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8073,6 +9055,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8082,18 +9066,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8103,10 +9094,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8119,10 +9112,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8137,6 +9132,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8175,6 +9171,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8187,12 +9184,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8203,6 +9204,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8210,6 +9212,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8224,6 +9227,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8241,6 +9245,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8253,6 +9258,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8274,6 +9287,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8291,6 +9305,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8303,6 +9318,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8325,6 +9348,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8355,6 +9379,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8429,6 +9454,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8459,6 +9485,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8549,16 +9576,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8614,6 +9652,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8644,6 +9683,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8708,6 +9748,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8719,6 +9762,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8728,12 +9773,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -8741,10 +9792,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -8759,10 +9816,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -8770,10 +9831,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8788,6 +9851,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8826,6 +9890,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8838,12 +9903,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8854,6 +9923,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8861,6 +9931,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8875,6 +9946,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8892,6 +9964,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8904,6 +9977,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8925,6 +10006,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8942,6 +10024,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8954,6 +10037,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8976,6 +10067,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9006,6 +10098,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9080,6 +10173,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9110,6 +10204,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9200,16 +10295,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9265,6 +10371,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9295,6 +10402,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9357,6 +10465,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9368,6 +10479,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9377,12 +10490,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -9421,6 +10540,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -9460,6 +10580,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -9498,6 +10627,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -9510,6 +10640,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -9551,6 +10682,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -9567,11 +10699,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -9660,6 +10794,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -9669,6 +10804,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9686,6 +10822,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9714,7 +10851,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -9729,6 +10868,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -9784,6 +10924,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -9825,6 +10966,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -9854,18 +10996,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -9896,11 +11026,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -9909,6 +11041,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -9931,10 +11065,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -9951,6 +11087,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10031,11 +11168,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10097,6 +11236,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10114,7 +11292,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10160,6 +11340,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10178,7 +11359,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10198,6 +11381,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -10229,6 +11413,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -10236,6 +11421,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10258,6 +11444,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10296,6 +11483,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -10310,6 +11498,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10335,6 +11524,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -10345,8 +11537,6 @@ spec: default: 0 format: int32 type: integer - required: - - entrypoint type: object status: properties: @@ -10355,6 +11545,10 @@ spec: endTime: format: date-time type: string + failed: + default: 0 + format: int32 + type: integer jobDeploymentStatus: type: string jobId: @@ -10373,6 +11567,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -10408,8 +11641,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -10424,16 +11661,28 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object reason: type: string startTime: format: date-time type: string + succeeded: + default: 0 + format: int32 + type: integer type: object type: object served: true @@ -10488,6 +11737,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -10526,6 +11776,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -10544,6 +11795,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -10554,6 +11806,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -10603,16 +11856,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -10677,6 +11941,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -10733,6 +11999,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -10757,6 +12024,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -10804,6 +12072,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -10857,6 +12127,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -10878,6 +12150,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -10933,11 +12206,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10949,11 +12224,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -10964,6 +12241,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -10980,11 +12258,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -10996,14 +12276,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11029,17 +12312,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11053,11 +12348,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11068,6 +12365,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11081,6 +12379,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11097,17 +12396,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11121,11 +12432,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11136,12 +12449,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11163,17 +12478,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11187,11 +12514,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11202,6 +12531,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11215,6 +12545,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11231,17 +12562,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11255,11 +12598,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11270,12 +12615,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11287,10 +12634,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11305,6 +12654,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11343,6 +12693,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11355,12 +12706,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11371,6 +12726,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11378,6 +12734,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11392,6 +12749,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11409,6 +12767,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11421,6 +12780,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11442,6 +12809,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11459,6 +12827,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11471,6 +12840,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11493,6 +12870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11523,6 +12901,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11597,6 +12976,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11627,6 +13007,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11717,16 +13098,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -11782,6 +13174,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11812,6 +13205,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11874,6 +13268,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -11885,6 +13282,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -11894,18 +13293,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -11915,10 +13321,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -11931,10 +13339,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11949,6 +13359,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11987,6 +13398,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11999,12 +13411,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12015,6 +13431,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12022,6 +13439,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12036,6 +13454,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12053,6 +13472,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12065,6 +13485,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12086,6 +13514,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12103,6 +13532,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12115,6 +13545,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12137,6 +13575,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12167,6 +13606,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12241,6 +13681,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12271,6 +13712,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12361,16 +13803,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12426,6 +13879,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12456,6 +13910,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12520,6 +13975,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12531,6 +13989,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12540,12 +14000,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12553,10 +14019,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12571,10 +14043,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12582,10 +14058,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12600,6 +14078,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12638,6 +14117,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12650,12 +14130,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12666,6 +14150,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12673,6 +14158,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12687,6 +14173,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12704,6 +14191,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12716,6 +14204,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12737,6 +14233,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12754,6 +14251,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12766,6 +14264,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12788,6 +14294,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12818,6 +14325,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12892,6 +14400,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12922,6 +14431,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13012,16 +14522,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13077,6 +14598,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13107,6 +14629,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13169,6 +14692,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13180,6 +14706,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13189,12 +14717,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13233,6 +14767,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13272,6 +14807,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13310,6 +14854,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13322,6 +14867,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13363,6 +14909,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13379,11 +14926,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13472,6 +15021,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13481,6 +15031,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13498,6 +15049,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13526,7 +15078,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13541,6 +15095,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13596,6 +15151,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13637,6 +15193,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13666,18 +15223,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13708,11 +15253,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13721,6 +15268,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -13743,10 +15292,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -13763,6 +15314,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13843,11 +15395,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13909,6 +15463,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -13926,7 +15519,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13972,6 +15567,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -13990,7 +15586,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14010,6 +15608,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14041,6 +15640,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14048,6 +15648,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14070,6 +15671,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14108,6 +15710,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14122,6 +15725,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14147,6 +15751,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14237,11 +15844,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -14253,11 +15862,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -14268,6 +15879,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -14284,11 +15896,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -14300,14 +15914,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -14333,17 +15950,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14357,11 +15986,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14372,6 +16003,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -14385,6 +16017,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -14401,17 +16034,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14425,11 +16070,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14440,12 +16087,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -14467,17 +16116,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14491,11 +16152,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14506,6 +16169,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -14519,6 +16183,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -14535,17 +16200,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -14559,11 +16236,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14574,12 +16253,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -14591,10 +16272,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -14609,6 +16292,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -14647,6 +16331,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -14659,12 +16344,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -14675,6 +16364,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -14682,6 +16372,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -14696,6 +16387,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -14713,6 +16405,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14725,6 +16418,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -14746,6 +16447,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -14763,6 +16465,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14775,6 +16478,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -14797,6 +16508,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -14827,6 +16539,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -14901,6 +16614,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -14931,6 +16645,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15021,16 +16736,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -15086,6 +16812,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15116,6 +16843,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15178,6 +16906,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -15189,6 +16920,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -15198,18 +16931,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -15219,10 +16959,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -15235,10 +16977,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -15253,6 +16997,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15291,6 +17036,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15303,12 +17049,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -15319,6 +17069,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -15326,6 +17077,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -15340,6 +17092,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -15357,6 +17110,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15369,6 +17123,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -15390,6 +17152,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -15407,6 +17170,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15419,6 +17183,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -15441,6 +17213,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15471,6 +17244,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15545,6 +17319,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15575,6 +17350,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15665,16 +17441,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -15730,6 +17517,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -15760,6 +17548,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -15824,6 +17613,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -15835,6 +17627,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -15844,12 +17638,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -15857,10 +17657,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -15875,10 +17681,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -15886,10 +17696,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -15904,6 +17716,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15942,6 +17755,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -15954,12 +17768,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -15970,6 +17788,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -15977,6 +17796,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -15991,6 +17811,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -16008,6 +17829,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16020,6 +17842,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -16041,6 +17871,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -16058,6 +17889,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16070,6 +17902,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -16092,6 +17932,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16122,6 +17963,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16196,6 +18038,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16226,6 +18069,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16316,16 +18160,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -16381,6 +18236,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -16411,6 +18267,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -16473,6 +18330,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -16484,6 +18344,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -16493,12 +18355,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -16537,6 +18405,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -16576,6 +18445,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -16614,6 +18492,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -16626,6 +18505,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -16667,6 +18547,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -16683,11 +18564,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -16776,6 +18659,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -16785,6 +18669,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16802,6 +18687,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16830,7 +18716,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -16845,6 +18733,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -16900,6 +18789,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -16941,6 +18831,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -16970,18 +18861,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -17012,11 +18891,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17025,6 +18906,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -17047,10 +18930,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -17067,6 +18952,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17147,11 +19033,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17213,6 +19101,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -17230,7 +19157,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -17276,6 +19205,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -17294,7 +19224,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -17314,6 +19246,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -17345,6 +19278,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -17352,6 +19286,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17374,6 +19309,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17412,6 +19348,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -17426,6 +19363,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -17451,6 +19389,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -17516,11 +19457,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -17532,11 +19475,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -17547,6 +19492,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -17563,11 +19509,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -17579,14 +19527,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -17612,17 +19563,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17636,11 +19599,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17651,6 +19616,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -17664,6 +19630,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -17680,17 +19647,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17704,11 +19683,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17719,12 +19700,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -17746,17 +19729,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17770,11 +19765,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17785,6 +19782,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -17798,6 +19796,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -17814,17 +19813,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -17838,11 +19849,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -17853,12 +19866,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -17870,10 +19885,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -17888,6 +19905,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -17926,6 +19944,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -17938,12 +19957,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -17954,6 +19977,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -17961,6 +19985,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -17975,6 +20000,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -17992,6 +20018,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18004,6 +20031,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18025,6 +20060,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18042,6 +20078,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18054,6 +20091,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18076,6 +20121,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18106,6 +20152,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18180,6 +20227,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18210,6 +20258,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18300,16 +20349,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -18365,6 +20425,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18395,6 +20456,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18457,6 +20519,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -18468,6 +20533,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -18477,18 +20544,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -18498,10 +20572,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -18514,10 +20590,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -18532,6 +20610,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -18570,6 +20649,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -18582,12 +20662,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -18598,6 +20682,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -18605,6 +20690,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -18619,6 +20705,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18636,6 +20723,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18648,6 +20736,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18669,6 +20765,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -18686,6 +20783,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18698,6 +20796,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -18720,6 +20826,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18750,6 +20857,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18824,6 +20932,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -18854,6 +20963,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -18944,16 +21054,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -19009,6 +21130,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19039,6 +21161,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19103,6 +21226,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -19114,6 +21240,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -19123,12 +21251,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -19136,10 +21270,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -19154,10 +21294,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -19165,10 +21309,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -19183,6 +21329,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -19221,6 +21368,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -19233,12 +21381,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -19249,6 +21401,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -19256,6 +21409,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -19270,6 +21424,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -19287,6 +21442,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19299,6 +21455,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -19320,6 +21484,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -19337,6 +21502,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19349,6 +21515,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -19371,6 +21545,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19401,6 +21576,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19475,6 +21651,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19505,6 +21682,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19595,16 +21773,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -19660,6 +21849,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -19690,6 +21880,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -19752,6 +21943,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -19763,6 +21957,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -19772,12 +21968,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -19816,6 +22018,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -19855,6 +22058,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -19893,6 +22105,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -19905,6 +22118,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -19946,6 +22160,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -19962,11 +22177,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -20055,6 +22272,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -20064,6 +22282,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20081,6 +22300,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20109,7 +22329,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20124,6 +22346,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20179,6 +22402,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -20220,6 +22444,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -20249,18 +22474,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -20291,11 +22504,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -20304,6 +22519,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -20326,10 +22543,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -20346,6 +22565,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20426,11 +22646,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20492,6 +22714,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -20509,7 +22770,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20555,6 +22818,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -20573,7 +22837,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -20593,6 +22859,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -20624,6 +22891,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -20631,6 +22899,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20653,6 +22922,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20691,6 +22961,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -20705,6 +22976,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -20730,6 +23002,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -20819,6 +23094,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index b3f8b901858..9d0fd9628d7 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.13.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: rayservices.ray.io spec: group: ray.io @@ -38,6 +38,8 @@ spec: deploymentUnhealthySecondThreshold: format: int32 type: integer + excludeHeadPodFromServeSvc: + type: boolean rayClusterConfig: properties: autoscalerOptions: @@ -56,6 +58,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -94,6 +97,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -112,6 +116,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -122,6 +127,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -171,16 +177,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -245,6 +262,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -257,6 +276,135 @@ spec: type: object enableInTreeAutoscaling: type: boolean + gcsFaultToleranceOptions: + properties: + externalStorageNamespace: + type: string + redisAddress: + type: string + redisPassword: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + redisUsername: + properties: + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + required: + - redisAddress + type: object headGroupSpec: properties: enableIngress: @@ -301,6 +449,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -325,6 +474,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -372,6 +522,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -425,6 +577,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -446,6 +600,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -501,11 +656,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -517,11 +674,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -532,6 +691,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -548,11 +708,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -564,14 +726,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -597,17 +762,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -621,11 +798,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -636,6 +815,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -649,6 +829,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -665,17 +846,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -689,11 +882,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -704,12 +899,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -731,17 +928,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -755,11 +964,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -770,6 +981,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -783,6 +995,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -799,17 +1012,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -823,11 +1048,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -838,12 +1065,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -855,10 +1084,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -873,6 +1104,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -911,6 +1143,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -923,12 +1156,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -939,6 +1176,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -946,6 +1184,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -960,6 +1199,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -977,6 +1217,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -989,6 +1230,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1010,6 +1259,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1027,6 +1277,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1039,6 +1290,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1061,6 +1320,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1091,6 +1351,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1165,6 +1426,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1195,6 +1457,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1285,16 +1548,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1350,6 +1624,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1380,6 +1655,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1442,6 +1718,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -1453,6 +1732,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -1462,18 +1743,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -1483,10 +1771,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -1499,10 +1789,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -1517,6 +1809,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1555,6 +1848,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -1567,12 +1861,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -1583,6 +1881,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -1590,6 +1889,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -1604,6 +1904,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1621,6 +1922,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1633,6 +1935,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1654,6 +1964,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -1671,6 +1982,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1683,6 +1995,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -1705,6 +2025,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1735,6 +2056,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1809,6 +2131,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -1839,6 +2162,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -1929,16 +2253,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -1994,6 +2329,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2024,6 +2360,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2088,6 +2425,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2099,6 +2439,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2108,12 +2450,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -2121,10 +2469,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -2139,10 +2493,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -2150,10 +2508,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -2168,6 +2528,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2206,6 +2567,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -2218,12 +2580,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -2234,6 +2600,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -2241,6 +2608,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -2255,6 +2623,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2272,6 +2641,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2284,6 +2654,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2305,6 +2683,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -2322,6 +2701,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2334,6 +2714,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -2356,6 +2744,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2386,6 +2775,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2460,6 +2850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2490,6 +2881,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2580,16 +2972,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -2645,6 +3048,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -2675,6 +3079,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -2737,6 +3142,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -2748,6 +3156,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -2757,12 +3167,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -2801,6 +3217,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -2840,6 +3257,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -2878,6 +3304,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -2890,6 +3317,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -2931,6 +3359,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -2947,11 +3376,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3040,6 +3471,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -3049,6 +3481,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3066,6 +3499,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3094,7 +3528,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3109,6 +3545,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3164,6 +3601,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -3205,6 +3643,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -3234,18 +3673,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -3276,11 +3703,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3289,6 +3718,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -3311,10 +3742,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -3331,6 +3764,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3411,11 +3845,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3477,6 +3913,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -3494,7 +3969,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3540,6 +4017,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -3558,7 +4036,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -3578,6 +4058,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -3609,6 +4090,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -3616,6 +4098,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3638,6 +4121,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3676,6 +4160,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -3690,6 +4175,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -3715,6 +4201,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -3727,6 +4216,14 @@ spec: additionalProperties: type: string type: object + managedBy: + type: string + x-kubernetes-validations: + - message: the managedBy field is immutable + rule: self == oldSelf + - message: the managedBy field value must be either 'ray.io/kuberay-operator' + or 'kueue.x-k8s.io/multikueue' + rule: self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue'] rayVersion: type: string suspend: @@ -3736,6 +4233,9 @@ spec: properties: groupName: type: string + idleTimeoutSeconds: + format: int32 + type: integer maxReplicas: default: 2147483647 format: int32 @@ -3763,6 +4263,8 @@ spec: type: string type: array type: object + suspend: + type: boolean template: properties: metadata: @@ -3809,11 +4311,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3825,11 +4329,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -3840,6 +4346,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -3856,11 +4363,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -3872,14 +4381,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -3905,17 +4417,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3929,11 +4453,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -3944,6 +4470,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -3957,6 +4484,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -3973,17 +4501,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -3997,11 +4537,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4012,12 +4554,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -4039,17 +4583,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4063,11 +4619,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4078,6 +4636,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -4091,6 +4650,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -4107,17 +4667,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -4131,11 +4703,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -4146,12 +4720,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -4163,10 +4739,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4181,6 +4759,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4219,6 +4798,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4231,12 +4811,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4247,6 +4831,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4254,6 +4839,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4268,6 +4854,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4285,6 +4872,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4297,6 +4885,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4318,6 +4914,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4335,6 +4932,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4347,6 +4945,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4369,6 +4975,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4399,6 +5006,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4473,6 +5081,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4503,6 +5112,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4593,16 +5203,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -4658,6 +5279,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -4688,6 +5310,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4750,6 +5373,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -4761,6 +5387,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -4770,18 +5398,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -4791,10 +5426,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -4807,10 +5444,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -4825,6 +5464,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4863,6 +5503,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -4875,12 +5516,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -4891,6 +5536,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -4898,6 +5544,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -4912,6 +5559,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4929,6 +5577,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4941,6 +5590,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -4962,6 +5619,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -4979,6 +5637,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -4991,6 +5650,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5013,6 +5680,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5043,6 +5711,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5117,6 +5786,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5147,6 +5817,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5237,16 +5908,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5302,6 +5984,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5332,6 +6015,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5396,6 +6080,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -5407,6 +6094,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -5416,12 +6105,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -5429,10 +6124,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -5447,10 +6148,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -5458,10 +6163,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -5476,6 +6183,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5514,6 +6222,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -5526,12 +6235,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -5542,6 +6255,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -5549,6 +6263,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -5563,6 +6278,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5580,6 +6296,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5592,6 +6309,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5613,6 +6338,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -5630,6 +6356,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5642,6 +6369,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -5664,6 +6399,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5694,6 +6430,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5768,6 +6505,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5798,6 +6536,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -5888,16 +6627,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -5953,6 +6703,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -5983,6 +6734,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -6045,6 +6797,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -6056,6 +6811,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -6065,12 +6822,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -6109,6 +6872,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -6148,6 +6912,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -6186,6 +6959,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -6198,6 +6972,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -6239,6 +7014,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -6255,11 +7031,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6348,6 +7126,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -6357,6 +7136,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6374,6 +7154,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6402,7 +7183,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6417,6 +7200,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6472,6 +7256,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -6513,6 +7298,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -6542,18 +7328,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -6584,11 +7358,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -6597,6 +7373,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -6619,10 +7397,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -6639,6 +7419,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6719,11 +7500,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6785,6 +7568,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -6802,7 +7624,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6848,6 +7672,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -6866,7 +7691,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -6886,6 +7713,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -6917,6 +7745,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -6924,6 +7753,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6946,6 +7776,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -6984,6 +7815,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -6998,6 +7830,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -7023,6 +7856,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -7080,6 +7916,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7104,6 +7941,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7151,6 +7989,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7204,6 +8044,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7225,12 +8067,18 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object serviceUnhealthySecondThreshold: format: int32 type: integer + upgradeStrategy: + properties: + type: + type: string + type: object type: object status: properties: @@ -7267,6 +8115,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7302,8 +8189,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7318,10 +8209,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object lastUpdateTime: @@ -7366,6 +8265,45 @@ spec: availableWorkerReplicas: format: int32 type: integer + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map desiredCPU: anyOf: - type: integer @@ -7401,8 +8339,12 @@ spec: properties: podIP: type: string + podName: + type: string serviceIP: type: string + serviceName: + type: string type: object lastUpdateTime: format: date-time @@ -7417,10 +8359,18 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: type: string + stateTransitionTimes: + additionalProperties: + format: date-time + type: string + type: object type: object type: object serviceStatus: @@ -7464,6 +8414,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7502,6 +8453,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -7520,6 +8472,7 @@ spec: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -7530,6 +8483,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -7579,16 +8533,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -7653,6 +8618,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -7709,6 +8676,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -7733,6 +8701,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -7780,6 +8749,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -7833,6 +8804,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -7854,6 +8827,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -7909,11 +8883,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7925,11 +8901,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -7940,6 +8918,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -7956,11 +8935,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -7972,14 +8953,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -8005,17 +8989,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8029,11 +9025,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8044,6 +9042,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -8057,6 +9056,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -8073,17 +9073,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8097,11 +9109,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8112,12 +9126,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -8139,17 +9155,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8163,11 +9191,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8178,6 +9208,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -8191,6 +9222,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -8207,17 +9239,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -8231,11 +9275,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -8246,12 +9292,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -8263,10 +9311,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8281,6 +9331,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8319,6 +9370,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8331,12 +9383,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8347,6 +9403,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8354,6 +9411,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -8368,6 +9426,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8385,6 +9444,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8397,6 +9457,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8418,6 +9486,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -8435,6 +9504,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8447,6 +9517,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -8469,6 +9547,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8499,6 +9578,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8573,6 +9653,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8603,6 +9684,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8693,16 +9775,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -8758,6 +9851,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -8788,6 +9882,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -8850,6 +9945,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -8861,6 +9959,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -8870,18 +9970,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -8891,10 +9998,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -8907,10 +10016,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -8925,6 +10036,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8963,6 +10075,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -8975,12 +10088,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -8991,6 +10108,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -8998,6 +10116,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9012,6 +10131,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9029,6 +10149,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9041,6 +10162,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9062,6 +10191,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9079,6 +10209,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9091,6 +10222,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9113,6 +10252,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9143,6 +10283,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9217,6 +10358,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9247,6 +10389,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9337,16 +10480,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -9402,6 +10556,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9432,6 +10587,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9496,6 +10652,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -9507,6 +10666,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -9516,12 +10677,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -9529,10 +10696,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -9547,10 +10720,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -9558,10 +10735,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -9576,6 +10755,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9614,6 +10794,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -9626,12 +10807,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -9642,6 +10827,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -9649,6 +10835,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -9663,6 +10850,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9680,6 +10868,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9692,6 +10881,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9713,6 +10910,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -9730,6 +10928,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9742,6 +10941,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -9764,6 +10971,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9794,6 +11002,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9868,6 +11077,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -9898,6 +11108,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -9988,16 +11199,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -10053,6 +11275,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -10083,6 +11306,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -10145,6 +11369,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -10156,6 +11383,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -10165,12 +11394,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -10209,6 +11444,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -10248,6 +11484,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -10286,6 +11531,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -10298,6 +11544,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -10339,6 +11586,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -10355,11 +11603,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10448,6 +11698,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -10457,6 +11708,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10474,6 +11726,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10502,7 +11755,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10517,6 +11772,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10572,6 +11828,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -10613,6 +11870,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -10642,18 +11900,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -10684,11 +11930,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -10697,6 +11945,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -10719,10 +11969,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -10739,6 +11991,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10819,11 +12072,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -10885,6 +12140,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -10902,7 +12196,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10948,6 +12244,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -10966,7 +12263,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -10986,6 +12285,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -11017,6 +12317,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -11024,6 +12325,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11046,6 +12348,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11084,6 +12387,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -11098,6 +12402,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -11123,6 +12428,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -11213,11 +12521,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11229,11 +12539,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic weight: @@ -11244,6 +12556,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: properties: nodeSelectorTerms: @@ -11260,11 +12573,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchFields: items: properties: @@ -11276,14 +12591,17 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-type: atomic required: - nodeSelectorTerms type: object @@ -11309,17 +12627,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11333,11 +12663,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11348,6 +12680,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11361,6 +12694,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11377,17 +12711,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11401,11 +12747,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11416,12 +12764,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object podAntiAffinity: properties: @@ -11443,17 +12793,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11467,11 +12829,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11482,6 +12846,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: @@ -11495,6 +12860,7 @@ spec: - weight type: object type: array + x-kubernetes-list-type: atomic requiredDuringSchedulingIgnoredDuringExecution: items: properties: @@ -11511,17 +12877,29 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string type: object type: object x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic namespaceSelector: properties: matchExpressions: @@ -11535,11 +12913,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -11550,12 +12930,14 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic topologyKey: type: string required: - topologyKey type: object type: array + x-kubernetes-list-type: atomic type: object type: object automountServiceAccountToken: @@ -11567,10 +12949,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -11585,6 +12969,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11623,6 +13008,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -11635,12 +13021,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -11651,6 +13041,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -11658,6 +13049,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -11672,6 +13064,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11689,6 +13082,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11701,6 +13095,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11722,6 +13124,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -11739,6 +13142,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11751,6 +13155,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -11773,6 +13185,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11803,6 +13216,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11877,6 +13291,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -11907,6 +13322,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -11997,16 +13413,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12062,6 +13489,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12092,6 +13520,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12154,6 +13583,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12165,6 +13597,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12174,18 +13608,25 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map dnsConfig: properties: nameservers: items: type: string type: array + x-kubernetes-list-type: atomic options: items: properties: @@ -12195,10 +13636,12 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic searches: items: type: string type: array + x-kubernetes-list-type: atomic type: object dnsPolicy: type: string @@ -12211,10 +13654,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12229,6 +13674,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12267,6 +13713,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12279,12 +13726,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12295,6 +13746,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12302,6 +13754,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12316,6 +13769,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12333,6 +13787,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12345,6 +13800,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12366,6 +13829,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12383,6 +13847,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12395,6 +13860,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -12417,6 +13890,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12447,6 +13921,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12521,6 +13996,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12551,6 +14027,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12641,16 +14118,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -12706,6 +14194,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -12736,6 +14225,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12800,6 +14290,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -12811,6 +14304,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -12820,12 +14315,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map hostAliases: items: properties: @@ -12833,10 +14334,16 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ip: type: string + required: + - ip type: object type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map hostIPC: type: boolean hostNetwork: @@ -12851,10 +14358,14 @@ spec: items: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map initContainers: items: properties: @@ -12862,10 +14373,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic command: items: type: string type: array + x-kubernetes-list-type: atomic env: items: properties: @@ -12880,6 +14393,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12918,6 +14432,7 @@ spec: key: type: string name: + default: "" type: string optional: type: boolean @@ -12930,12 +14445,16 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map envFrom: items: properties: configMapRef: properties: name: + default: "" type: string optional: type: boolean @@ -12946,6 +14465,7 @@ spec: secretRef: properties: name: + default: "" type: string optional: type: boolean @@ -12953,6 +14473,7 @@ spec: x-kubernetes-map-type: atomic type: object type: array + x-kubernetes-list-type: atomic image: type: string imagePullPolicy: @@ -12967,6 +14488,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -12984,6 +14506,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -12996,6 +14519,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -13017,6 +14548,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object httpGet: properties: @@ -13034,6 +14566,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13046,6 +14579,14 @@ spec: required: - port type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object tcpSocket: properties: host: @@ -13068,6 +14609,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13098,6 +14640,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13172,6 +14715,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13202,6 +14746,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13292,16 +14837,27 @@ spec: properties: allowPrivilegeEscalation: type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object capabilities: properties: add: items: type: string type: array + x-kubernetes-list-type: atomic drop: items: type: string type: array + x-kubernetes-list-type: atomic type: object privileged: type: boolean @@ -13357,6 +14913,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic type: object failureThreshold: format: int32 @@ -13387,6 +14944,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic path: type: string port: @@ -13449,6 +15007,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map volumeMounts: items: properties: @@ -13460,6 +15021,8 @@ spec: type: string readOnly: type: boolean + recursiveReadOnly: + type: string subPath: type: string subPathExpr: @@ -13469,12 +15032,18 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map workingDir: type: string required: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map nodeName: type: string nodeSelector: @@ -13513,6 +15082,7 @@ spec: - conditionType type: object type: array + x-kubernetes-list-type: atomic resourceClaims: items: properties: @@ -13552,6 +15122,15 @@ spec: x-kubernetes-list-type: map securityContext: properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object fsGroup: format: int64 type: integer @@ -13590,6 +15169,7 @@ spec: format: int64 type: integer type: array + x-kubernetes-list-type: atomic sysctls: items: properties: @@ -13602,6 +15182,7 @@ spec: - value type: object type: array + x-kubernetes-list-type: atomic windowsOptions: properties: gmsaCredentialSpec: @@ -13643,6 +15224,7 @@ spec: type: string type: object type: array + x-kubernetes-list-type: atomic topologySpreadConstraints: items: properties: @@ -13659,11 +15241,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -13752,6 +15336,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic path: type: string readOnly: @@ -13761,6 +15346,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13778,6 +15364,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13806,7 +15393,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -13821,6 +15410,7 @@ spec: nodePublishSecretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -13876,6 +15466,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object emptyDir: properties: @@ -13917,6 +15508,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic dataSource: properties: apiGroup: @@ -13946,18 +15538,6 @@ spec: type: object resources: properties: - claims: - items: - properties: - name: - type: string - required: - - name - type: object - type: array - x-kubernetes-list-map-keys: - - name - x-kubernetes-list-type: map limits: additionalProperties: anyOf: @@ -13988,11 +15568,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator type: object type: array + x-kubernetes-list-type: atomic matchLabels: additionalProperties: type: string @@ -14001,6 +15583,8 @@ spec: x-kubernetes-map-type: atomic storageClassName: type: string + volumeAttributesClassName: + type: string volumeMode: type: string volumeName: @@ -14023,10 +15607,12 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic wwids: items: type: string type: array + x-kubernetes-list-type: atomic type: object flexVolume: properties: @@ -14043,6 +15629,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14123,11 +15710,13 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic readOnly: type: boolean secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14189,6 +15778,45 @@ spec: sources: items: properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object configMap: properties: items: @@ -14206,7 +15834,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14252,6 +15882,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic type: object secret: properties: @@ -14270,7 +15901,9 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic name: + default: "" type: string optional: type: boolean @@ -14290,6 +15923,7 @@ spec: type: object type: object type: array + x-kubernetes-list-type: atomic type: object quobyte: properties: @@ -14321,6 +15955,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic pool: type: string readOnly: @@ -14328,6 +15963,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14350,6 +15986,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14388,6 +16025,7 @@ spec: - path type: object type: array + x-kubernetes-list-type: atomic optional: type: boolean secretName: @@ -14402,6 +16040,7 @@ spec: secretRef: properties: name: + default: "" type: string type: object x-kubernetes-map-type: atomic @@ -14427,6 +16066,9 @@ spec: - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - containers type: object @@ -14484,6 +16126,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic externalName: type: string externalTrafficPolicy: @@ -14508,6 +16151,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic ports: items: properties: @@ -14555,6 +16199,8 @@ spec: type: integer type: object type: object + trafficDistribution: + type: string type: type: string type: object @@ -14608,6 +16254,8 @@ spec: type: string ip: type: string + ipMode: + type: string ports: items: properties: @@ -14629,6 +16277,7 @@ spec: x-kubernetes-list-type: atomic type: object type: array + x-kubernetes-list-type: atomic type: object type: object type: object @@ -14722,6 +16371,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: @@ -14818,6 +16470,9 @@ spec: observedGeneration: format: int64 type: integer + readyWorkerReplicas: + format: int32 + type: integer reason: type: string state: diff --git a/ray-operator/config/crd/kustomization.yaml b/ray-operator/config/crd/kustomization.yaml index 707c2e5d1c0..ebcf033aaa4 100644 --- a/ray-operator/config/crd/kustomization.yaml +++ b/ray-operator/config/crd/kustomization.yaml @@ -5,4 +5,5 @@ resources: - bases/ray.io_rayclusters.yaml - bases/ray.io_rayservices.yaml - bases/ray.io_rayjobs.yaml -# +kubebuilder:scaffold:crdkustomizeresource +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization diff --git a/ray-operator/config/default-with-webhooks/kustomization.yaml b/ray-operator/config/default-with-webhooks/kustomization.yaml index d11134b6ccb..e8e9dfd6326 100644 --- a/ray-operator/config/default-with-webhooks/kustomization.yaml +++ b/ray-operator/config/default-with-webhooks/kustomization.yaml @@ -19,9 +19,6 @@ images: newName: quay.io/kuberay/operator newTag: nightly -patchesStrategicMerge: -- manager_webhook_patch.yaml -- webhookcainjection_patch.yaml replacements: - source: @@ -125,3 +122,6 @@ resources: - ../webhook - ../certmanager - namespace.yaml +patches: +- path: manager_webhook_patch.yaml +- path: webhookcainjection_patch.yaml diff --git a/ray-operator/config/default-with-webhooks/namespace.yaml b/ray-operator/config/default-with-webhooks/namespace.yaml index 54b94d016af..f51701dfe02 100644 --- a/ray-operator/config/default-with-webhooks/namespace.yaml +++ b/ray-operator/config/default-with-webhooks/namespace.yaml @@ -4,4 +4,3 @@ metadata: labels: control-plane: ray-operator name: ray-system - diff --git a/ray-operator/config/default/kustomization.yaml b/ray-operator/config/default/kustomization.yaml index 770ef275ef2..c06eafe6440 100644 --- a/ray-operator/config/default/kustomization.yaml +++ b/ray-operator/config/default/kustomization.yaml @@ -1,5 +1,5 @@ # Adds namespace to all resources. -namespace: ray-system +namespace: default # Value of this field is prepended to the # names of all resources, e.g. a deployment named @@ -12,11 +12,6 @@ namespace: ray-system #commonLabels: # someName: someValue -bases: -- ../crd -- ../rbac -- ../manager -- namespace.yaml # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus @@ -24,4 +19,9 @@ images: - name: kuberay/operator newName: quay.io/kuberay/operator newTag: nightly - +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../crd +- ../rbac +- ../manager diff --git a/ray-operator/config/default/namespace.yaml b/ray-operator/config/default/namespace.yaml deleted file mode 100644 index 54b94d016af..00000000000 --- a/ray-operator/config/default/namespace.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - labels: - control-plane: ray-operator - name: ray-system - diff --git a/ray-operator/config/manager/kustomization.yaml b/ray-operator/config/manager/kustomization.yaml index 6ba801db9a7..0a9f52dedcb 100644 --- a/ray-operator/config/manager/kustomization.yaml +++ b/ray-operator/config/manager/kustomization.yaml @@ -4,6 +4,8 @@ resources: apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -commonLabels: - app.kubernetes.io/component: kuberay-operator - app.kubernetes.io/name: kuberay +labels: +- includeSelectors: true + pairs: + app.kubernetes.io/component: kuberay-operator + app.kubernetes.io/name: kuberay diff --git a/ray-operator/config/manager/manager.yaml b/ray-operator/config/manager/manager.yaml index b22780fca7e..93473aeba26 100644 --- a/ray-operator/config/manager/manager.yaml +++ b/ray-operator/config/manager/manager.yaml @@ -26,7 +26,8 @@ spec: containers: - command: - /manager -# args: + args: + - --feature-gates=RayClusterStatusConditions=true # this argument can be removed for version >= v1.3 where the feature gate is enabled by default. # - --enable-leader-election image: kuberay/operator imagePullPolicy: IfNotPresent diff --git a/ray-operator/config/rbac/kustomization.yaml b/ray-operator/config/rbac/kustomization.yaml index d09b2e3e297..6bf65d44f67 100644 --- a/ray-operator/config/rbac/kustomization.yaml +++ b/ray-operator/config/rbac/kustomization.yaml @@ -5,6 +5,10 @@ resources: - leader_election_role_binding.yaml - service_account.yaml -commonLabels: - app.kubernetes.io/name: kuberay - app.kubernetes.io/component: kuberay-operator +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +labels: +- includeSelectors: true + pairs: + app.kubernetes.io/component: kuberay-operator + app.kubernetes.io/name: kuberay diff --git a/ray-operator/config/rbac/leader_election_role.yaml b/ray-operator/config/rbac/leader_election_role.yaml index 81132016932..6f32a1b5163 100644 --- a/ray-operator/config/rbac/leader_election_role.yaml +++ b/ray-operator/config/rbac/leader_election_role.yaml @@ -16,14 +16,6 @@ rules: - update - patch - delete -- apiGroups: - - "" - resources: - - configmaps/status - verbs: - - get - - update - - patch - apiGroups: - "" resources: diff --git a/ray-operator/config/rbac/role.yaml b/ray-operator/config/rbac/role.yaml index c15fdd9bfcc..613df521452 100644 --- a/ray-operator/config/rbac/role.yaml +++ b/ray-operator/config/rbac/role.yaml @@ -32,6 +32,7 @@ rules: verbs: - get - list + - watch - apiGroups: - "" resources: @@ -57,6 +58,14 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - pods/proxy + verbs: + - get + - patch + - update - apiGroups: - "" resources: @@ -91,6 +100,15 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - services/proxy + verbs: + - create + - get + - patch + - update - apiGroups: - "" resources: diff --git a/ray-operator/config/rbac/role_binding.yaml b/ray-operator/config/rbac/role_binding.yaml index 7de37e7df0a..42e453340db 100644 --- a/ray-operator/config/rbac/role_binding.yaml +++ b/ray-operator/config/rbac/role_binding.yaml @@ -8,4 +8,4 @@ roleRef: name: kuberay-operator subjects: - kind: ServiceAccount - name: kuberay-operator \ No newline at end of file + name: kuberay-operator diff --git a/ray-operator/config/samples/ingress-rayclient-tls.yaml b/ray-operator/config/samples/ingress-rayclient-tls.yaml index 7e3042fd093..3de85442af9 100644 --- a/ray-operator/config/samples/ingress-rayclient-tls.yaml +++ b/ray-operator/config/samples/ingress-rayclient-tls.yaml @@ -18,4 +18,3 @@ spec: number: 10001 path: / pathType: Prefix - diff --git a/ray-operator/config/samples/pytorch-mnist/ray-job.pytorch-mnist.yaml b/ray-operator/config/samples/pytorch-mnist/ray-job.pytorch-mnist.yaml new file mode 100644 index 00000000000..15747aae1e3 --- /dev/null +++ b/ray-operator/config/samples/pytorch-mnist/ray-job.pytorch-mnist.yaml @@ -0,0 +1,60 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-pytorch-mnist +spec: + shutdownAfterJobFinishes: false + entrypoint: python ray-operator/config/samples/pytorch-mnist/ray_train_pytorch_mnist.py + runtimeEnvYAML: | + pip: + - torch + - torchvision + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + env_vars: + NUM_WORKERS: "2" + CPUS_PER_WORKER: "2" + + # rayClusterSpec specifies the RayCluster instance to be created by the RayJob controller. + rayClusterSpec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + memory: "4Gi" + requests: + cpu: "1" + memory: "4Gi" + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: "3" + memory: "4Gi" + requests: + cpu: "3" + memory: "4Gi" diff --git a/ray-operator/config/samples/pytorch-mnist/ray_train_pytorch_mnist.py b/ray-operator/config/samples/pytorch-mnist/ray_train_pytorch_mnist.py new file mode 100644 index 00000000000..2d0844ccb86 --- /dev/null +++ b/ray-operator/config/samples/pytorch-mnist/ray_train_pytorch_mnist.py @@ -0,0 +1,165 @@ +""" +Reference: https://docs.ray.io/en/master/train/examples/pytorch/torch_fashion_mnist_example.html + +This script is a modified version of the original PyTorch Fashion MNIST +example. It uses only CPU resources to train the MNIST model. See +`ScalingConfig` for more details. +""" +import os +from typing import Dict + +import torch +from filelock import FileLock +from torch import nn +from torch.utils.data import DataLoader +from torchvision import datasets, transforms +from torchvision.transforms import Normalize, ToTensor +from tqdm import tqdm + +import ray.train +from ray.train import ScalingConfig +from ray.train.torch import TorchTrainer + + +def get_dataloaders(batch_size): + # Transform to normalize the input images + transform = transforms.Compose([ToTensor(), Normalize((0.5,), (0.5,))]) + + with FileLock(os.path.expanduser("~/data.lock")): + # Download training data from open datasets + training_data = datasets.FashionMNIST( + root="~/data", + train=True, + download=True, + transform=transform, + ) + + # Download test data from open datasets + test_data = datasets.FashionMNIST( + root="~/data", + train=False, + download=True, + transform=transform, + ) + + # Create data loaders + train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True) + test_dataloader = DataLoader(test_data, batch_size=batch_size) + + return train_dataloader, test_dataloader + + +# Model Definition +class NeuralNetwork(nn.Module): + def __init__(self): + super(NeuralNetwork, self).__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(28 * 28, 512), + nn.ReLU(), + nn.Dropout(0.25), + nn.Linear(512, 512), + nn.ReLU(), + nn.Dropout(0.25), + nn.Linear(512, 10), + nn.ReLU(), + ) + + def forward(self, x): + x = self.flatten(x) + logits = self.linear_relu_stack(x) + return logits + + +def train_func_per_worker(config: Dict): + lr = config["lr"] + epochs = config["epochs"] + batch_size = config["batch_size_per_worker"] + + # Get dataloaders inside the worker training function + train_dataloader, test_dataloader = get_dataloaders(batch_size=batch_size) + + # [1] Prepare Dataloader for distributed training + # Shard the datasets among workers and move batches to the correct device + # ======================================================================= + train_dataloader = ray.train.torch.prepare_data_loader(train_dataloader) + test_dataloader = ray.train.torch.prepare_data_loader(test_dataloader) + + model = NeuralNetwork() + + # [2] Prepare and wrap your model with DistributedDataParallel + # Move the model to the correct GPU/CPU device + # ============================================================ + model = ray.train.torch.prepare_model(model) + + loss_fn = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) + + # Model training loop + for epoch in range(epochs): + if ray.train.get_context().get_world_size() > 1: + # Required for the distributed sampler to shuffle properly across epochs. + train_dataloader.sampler.set_epoch(epoch) + + model.train() + for X, y in tqdm(train_dataloader, desc=f"Train Epoch {epoch}"): + pred = model(X) + loss = loss_fn(pred, y) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + model.eval() + test_loss, num_correct, num_total = 0, 0, 0 + with torch.no_grad(): + for X, y in tqdm(test_dataloader, desc=f"Test Epoch {epoch}"): + pred = model(X) + loss = loss_fn(pred, y) + + test_loss += loss.item() + num_total += y.shape[0] + num_correct += (pred.argmax(1) == y).sum().item() + + test_loss /= len(test_dataloader) + accuracy = num_correct / num_total + + # [3] Report metrics to Ray Train + # =============================== + ray.train.report(metrics={"loss": test_loss, "accuracy": accuracy}) + + +def train_fashion_mnist(num_workers=4, cpus_per_worker=2, use_gpu=False): + global_batch_size = 32 + + train_config = { + "lr": 1e-3, + "epochs": 10, + "batch_size_per_worker": global_batch_size // num_workers, + } + + # Configure computation resources + scaling_config = ScalingConfig( + num_workers=num_workers, + use_gpu=use_gpu, + resources_per_worker={"CPU": cpus_per_worker} + ) + + # Initialize a Ray TorchTrainer + trainer = TorchTrainer( + train_loop_per_worker=train_func_per_worker, + train_loop_config=train_config, + scaling_config=scaling_config, + ) + + # [4] Start distributed training + # Run `train_func_per_worker` on all workers + # ============================================= + result = trainer.fit() + print(f"Training result: {result}") + + +if __name__ == "__main__": + num_workers = int(os.getenv("NUM_WORKERS", "4")) + cpus_per_worker = int(os.getenv("CPUS_PER_WORKER", "2")) + train_fashion_mnist(num_workers=num_workers, cpus_per_worker=cpus_per_worker) diff --git a/ray-operator/config/samples/pytorch-resnet-image-classifier/fine-tune-pytorch-resnet-image-classifier.py b/ray-operator/config/samples/pytorch-resnet-image-classifier/fine-tune-pytorch-resnet-image-classifier.py new file mode 100644 index 00000000000..f43e7d66087 --- /dev/null +++ b/ray-operator/config/samples/pytorch-resnet-image-classifier/fine-tune-pytorch-resnet-image-classifier.py @@ -0,0 +1,225 @@ +import os +import warnings +from tempfile import TemporaryDirectory + +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +from torchvision import datasets, models, transforms +import numpy as np + +import ray.train as train +from ray.train.torch import TorchTrainer +from ray.train import ScalingConfig, RunConfig, CheckpointConfig, Checkpoint + +# Data augmentation and normalization for training +# Just normalization for validation +data_transforms = { + "train": transforms.Compose( + [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ] + ), + "val": transforms.Compose( + [ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ] + ), +} + +def download_datasets(): + os.system( + "wget https://download.pytorch.org/tutorial/hymenoptera_data.zip >/dev/null 2>&1" + ) + os.system("unzip hymenoptera_data.zip >/dev/null 2>&1") + +# Download and build torch datasets +def build_datasets(): + torch_datasets = {} + for split in ["train", "val"]: + torch_datasets[split] = datasets.ImageFolder( + os.path.join("./hymenoptera_data", split), data_transforms[split] + ) + return torch_datasets + +def initialize_model(): + # Load pretrained model params + model = models.resnet50(pretrained=True) + + # Replace the original classifier with a new Linear layer + num_features = model.fc.in_features + model.fc = nn.Linear(num_features, 2) + + # Ensure all params get updated during finetuning + for param in model.parameters(): + param.requires_grad = True + return model + + +def evaluate(logits, labels): + _, preds = torch.max(logits, 1) + corrects = torch.sum(preds == labels).item() + return corrects + +train_loop_config = { + "input_size": 224, # Input image size (224 x 224) + "batch_size": 32, # Batch size for training + "num_epochs": 10, # Number of epochs to train for + "lr": 0.001, # Learning Rate + "momentum": 0.9, # SGD optimizer momentum +} + +def train_loop_per_worker(configs): + warnings.filterwarnings("ignore") + + # Calculate the batch size for a single worker + worker_batch_size = configs["batch_size"] + + # Download dataset once on local rank 0 worker + if train.get_context().get_local_rank() == 0: + download_datasets() + torch.distributed.barrier() + + # Build datasets on each worker + torch_datasets = build_datasets() + + # Prepare dataloader for each worker + dataloaders = dict() + dataloaders["train"] = DataLoader( + torch_datasets["train"], batch_size=worker_batch_size, shuffle=True + ) + dataloaders["val"] = DataLoader( + torch_datasets["val"], batch_size=worker_batch_size, shuffle=False + ) + + # Distribute + dataloaders["train"] = train.torch.prepare_data_loader(dataloaders["train"]) + dataloaders["val"] = train.torch.prepare_data_loader(dataloaders["val"]) + + device = train.torch.get_device() + + # Prepare DDP Model, optimizer, and loss function. + model = initialize_model() + + # Reload from checkpoint if exists. + start_epoch = 0 + checkpoint = train.get_checkpoint() + if checkpoint: + with checkpoint.as_directory() as checkpoint_dir: + state_dict = torch.load(os.path.join(checkpoint_dir, "checkpoint.pt")) + model.load_state_dict(state_dict['model']) + + start_epoch = state_dict['epoch'] + 1 + + model = train.torch.prepare_model(model) + + optimizer = optim.SGD( + model.parameters(), lr=configs["lr"], momentum=configs["momentum"] + ) + criterion = nn.CrossEntropyLoss() + + # Start training loops + for epoch in range(start_epoch, configs["num_epochs"]): + # Each epoch has a training and validation phase + for phase in ["train", "val"]: + if phase == "train": + model.train() # Set model to training mode + else: + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + if train.get_context().get_world_size() > 1: + dataloaders[phase].sampler.set_epoch(epoch) + + for inputs, labels in dataloaders[phase]: + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + with torch.set_grad_enabled(phase == "train"): + # Get model outputs and calculate loss + outputs = model(inputs) + loss = criterion(outputs, labels) + + # backward + optimize only if in training phase + if phase == "train": + loss.backward() + optimizer.step() + + # calculate statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += evaluate(outputs, labels) + + size = len(torch_datasets[phase]) + epoch_loss = running_loss / size + epoch_acc = running_corrects / size + + if train.get_context().get_world_rank() == 0: + print( + "Epoch {}-{} Loss: {:.4f} Acc: {:.4f}".format( + epoch, phase, epoch_loss, epoch_acc + ) + ) + + # Report metrics and checkpoint every epoch + if phase == "val": + with TemporaryDirectory() as tmpdir: + state_dict = { + "epoch": epoch, + "model": model.module.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + } + + # In standard DDP training, where the model is the same across all ranks, + # only the global rank 0 worker needs to save and report the checkpoint + if train.get_context().get_world_rank() == 0: + torch.save(state_dict, os.path.join(tmpdir, "checkpoint.pt")) + + train.report( + metrics={"loss": epoch_loss, "acc": epoch_acc}, + checkpoint=Checkpoint.from_directory(tmpdir), + ) + +if __name__ == "__main__": + num_workers = int(os.environ.get("NUM_WORKERS", "4")) + scaling_config = ScalingConfig( + num_workers=num_workers, use_gpu=True, resources_per_worker={"CPU": 1, "GPU": 1} + ) + + checkpoint_config = CheckpointConfig(num_to_keep=3) + run_config = RunConfig( + name="finetune-resnet", + storage_path="/mnt/cluster_storage", + checkpoint_config=checkpoint_config, + ) + + experiment_path = os.path.expanduser("/mnt/cluster_storage/finetune-resnet") + if TorchTrainer.can_restore(experiment_path): + trainer = TorchTrainer.restore(experiment_path, + train_loop_per_worker=train_loop_per_worker, + train_loop_config=train_loop_config, + scaling_config=scaling_config, + run_config=run_config, + ) + else: + trainer = TorchTrainer( + train_loop_per_worker=train_loop_per_worker, + train_loop_config=train_loop_config, + scaling_config=scaling_config, + run_config=run_config, + ) + + result = trainer.fit() + print(result) diff --git a/ray-operator/config/samples/pytorch-resnet-image-classifier/ray-job.pytorch-image-classifier.yaml b/ray-operator/config/samples/pytorch-resnet-image-classifier/ray-job.pytorch-image-classifier.yaml new file mode 100644 index 00000000000..bb3826df872 --- /dev/null +++ b/ray-operator/config/samples/pytorch-resnet-image-classifier/ray-job.pytorch-image-classifier.yaml @@ -0,0 +1,111 @@ +# This RayJob is based on the "Finetuning a Pytorch Image Classifier with Ray Train" example in the Ray documentation. +# See https://docs.ray.io/en/latest/train/examples/pytorch/pytorch_resnet_finetune.html for more details. +apiVersion: ray.io/v1 +kind: RayJob +metadata: + generateName: pytorch-image-classifier- +spec: + shutdownAfterJobFinishes: true + entrypoint: python ray-operator/config/samples/pytorch-resnet-image-classifier/fine-tune-pytorch-resnet-image-classifier.py + runtimeEnvYAML: | + pip: + - numpy + - datasets + - torch + - torchvision + - transformers>=4.19.1 + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + rayClusterSpec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + metadata: + annotations: + gke-gcsfuse/volumes: "true" + gke-gcsfuse/cpu-limit: "0" + gke-gcsfuse/memory-limit: 5Gi + gke-gcsfuse/ephemeral-storage-limit: 10Gi + spec: + serviceAccountName: pytorch-distributed-training + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + env: + - name: NUM_WORKERS + value: "4" + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + memory: "8G" + requests: + cpu: "1" + memory: "8G" + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - mountPath: /mnt/cluster_storage + name: cluster-storage + volumes: + - name: ray-logs + emptyDir: {} + - name: cluster-storage + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: GCS_BUCKET + mountOptions: "implicit-dirs,uid=1000,gid=100" + workerGroupSpecs: + - replicas: 4 + minReplicas: 4 + maxReplicas: 4 + groupName: gpu-group + rayStartParams: + dashboard-host: '0.0.0.0' + template: + metadata: + annotations: + gke-gcsfuse/volumes: "true" + gke-gcsfuse/cpu-limit: "0" + gke-gcsfuse/memory-limit: 5Gi + gke-gcsfuse/ephemeral-storage-limit: 10Gi + spec: + serviceAccountName: pytorch-distributed-training + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + containers: + - name: ray-worker + image: rayproject/ray-ml:2.9.0-gpu + resources: + limits: + cpu: "1" + memory: "8G" + nvidia.com/gpu: "1" + requests: + cpu: "1" + memory: "8G" + nvidia.com/gpu: "1" + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - mountPath: /mnt/cluster_storage + name: cluster-storage + volumes: + - name: ray-logs + emptyDir: {} + - name: cluster-storage + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: GCS_BUCKET + mountOptions: "implicit-dirs,uid=1000,gid=100" diff --git a/ray-operator/config/samples/pytorch-text-classifier/fine-tune-pytorch-text-classifier.py b/ray-operator/config/samples/pytorch-text-classifier/fine-tune-pytorch-text-classifier.py index e2b4791bad2..0617deb03cb 100644 --- a/ray-operator/config/samples/pytorch-text-classifier/fine-tune-pytorch-text-classifier.py +++ b/ray-operator/config/samples/pytorch-text-classifier/fine-tune-pytorch-text-classifier.py @@ -144,7 +144,7 @@ def train_func(config): trainer = TorchTrainer( train_loop_per_worker=train_func, train_loop_config=train_func_config, - scaling_config=scaling_config, + scaling_config=scaling_config, run_config=run_config, datasets={"train": train_dataset, "validation": validation_dataset}, # <- Feed the Ray Datasets here ) diff --git a/ray-operator/config/samples/ray-cluster-alb-ingress.yaml b/ray-operator/config/samples/ray-cluster-alb-ingress.yaml index 603299eae80..034b0a20958 100644 --- a/ray-operator/config/samples/ray-cluster-alb-ingress.yaml +++ b/ray-operator/config/samples/ray-cluster-alb-ingress.yaml @@ -9,7 +9,7 @@ metadata: alb.ingress.kubernetes.io/scheme: internal alb.ingress.kubernetes.io/tags: Environment=dev,Team=test # See `ingress.md` for more details about how to choose subnets. - alb.ingress.kubernetes.io/subnets: subnet-0930d6b677fb40a74, subnet-0066ab2e15925618c + alb.ingress.kubernetes.io/subnets: subnet-0930d6b677fb40a74, subnet-0066ab2e15925618c alb.ingress.kubernetes.io/target-type: ip spec: ingressClassName: alb diff --git a/ray-operator/config/samples/ray-cluster.auth.yaml b/ray-operator/config/samples/ray-cluster.auth.yaml new file mode 100644 index 00000000000..e1f52680652 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.auth.yaml @@ -0,0 +1,135 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-rbac-proxy +data: + config-file.yaml: |+ + authorization: + resourceAttributes: + namespace: default + apiVersion: v1 + apiGroup: ray.io + resource: rayclusters + name: ray-cluster-with-auth +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-rbac-proxy +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-rbac-proxy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-rbac-proxy +subjects: +- kind: ServiceAccount + name: kube-rbac-proxy + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-rbac-proxy +rules: +- apiGroups: ["authentication.k8s.io"] + resources: + - tokenreviews + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: + - subjectaccessreviews + verbs: ["create"] +--- +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: ray-cluster-with-auth +spec: + headGroupSpec: + rayStartParams: + dashboard-host: '127.0.0.1' + dashboard-port: '8443' + template: + metadata: + spec: + serviceAccountName: kube-rbac-proxy + containers: + - name: ray-head + image: rayproject/ray:2.39.0 + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "2" + memory: "4Gi" + requests: + cpu: "2" + memory: "4Gi" + readinessProbe: + exec: + command: + - bash + - -c + - wget -T 2 -q -O- http://localhost:52365/api/local_raylet_healthz | grep + success && wget -T 10 -q -O- http://localhost:8443/api/gcs_healthz | grep + success + failureThreshold: 10 + initialDelaySeconds: 10 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 2 + livenessProbe: + exec: + command: + - bash + - -c + - wget -T 2 -q -O- http://localhost:52365/api/local_raylet_healthz | grep + success && wget -T 10 -q -O- http://localhost:8443/api/gcs_healthz | grep + success + failureThreshold: 120 + initialDelaySeconds: 30 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 2 + - name: kube-rbac-proxy + image: quay.io/brancz/kube-rbac-proxy:v0.18.1 + args: + - "--insecure-listen-address=0.0.0.0:8265" + - "--upstream=http://127.0.0.1:8443/" + - "--config-file=/etc/kube-rbac-proxy/config-file.yaml" + - "--logtostderr=true" + volumeMounts: + - name: config + mountPath: /etc/kube-rbac-proxy + volumes: + - name: config + configMap: + name: kube-rbac-proxy + workerGroupSpecs: + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + groupName: worker-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.39.0 + resources: + limits: + cpu: 1 + memory: "4Gi" + requests: + cpu: 1 + memory: "4Gi" diff --git a/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml b/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml index c5a22232bf9..f5ab4579871 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler-v2.yaml @@ -41,10 +41,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml index 7176f3a2d35..2f021f6044a 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml @@ -91,10 +91,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -139,7 +135,3 @@ spec: requests: cpu: 14 memory: 54Gi - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.yaml index 91332a84f57..dc74807ca57 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.yaml @@ -63,10 +63,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" @@ -109,10 +105,6 @@ spec: containers: - name: ray-worker image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-cluster.complete.large.yaml b/ray-operator/config/samples/ray-cluster.complete.large.yaml index 7b79e98649d..d43e2b51ad7 100644 --- a/ray-operator/config/samples/ray-cluster.complete.large.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.large.yaml @@ -52,10 +52,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] volumeMounts: - mountPath: /tmp/ray name: ray-logs @@ -100,10 +96,6 @@ spec: requests: cpu: 14 memory: 54Gi - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] # use volumeMounts.Optional. # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ volumeMounts: diff --git a/ray-operator/config/samples/ray-cluster.complete.yaml b/ray-operator/config/samples/ray-cluster.complete.yaml index cce5c7a4036..6c245cdcbac 100644 --- a/ray-operator/config/samples/ray-cluster.complete.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.yaml @@ -38,10 +38,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] volumeMounts: - mountPath: /tmp/ray name: ray-logs @@ -92,10 +88,6 @@ spec: containers: - name: ray-worker image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] # use volumeMounts.Optional. # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ volumeMounts: diff --git a/ray-operator/config/samples/ray-cluster.embed-grafana.yaml b/ray-operator/config/samples/ray-cluster.embed-grafana.yaml index 345225e6a18..cea43ccb8b2 100644 --- a/ray-operator/config/samples/ray-cluster.embed-grafana.yaml +++ b/ray-operator/config/samples/ray-cluster.embed-grafana.yaml @@ -32,10 +32,6 @@ spec: name: as-metrics # autoscaler - containerPort: 44227 name: dash-metrics # dashboard - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] volumeMounts: - mountPath: /tmp/ray name: ray-logs @@ -69,10 +65,6 @@ spec: containers: - name: ray-worker image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] volumeMounts: - mountPath: /tmp/ray name: ray-logs diff --git a/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml index c9a0d2c4d85..17496d09680 100644 --- a/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml +++ b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml @@ -1,77 +1,3 @@ -kind: ConfigMap -apiVersion: v1 -metadata: - name: redis-config - labels: - app: redis -data: - redis.conf: |- - dir /data - port 6379 - bind 0.0.0.0 - appendonly yes - protected-mode no - requirepass 5241590000000000 - pidfile /data/redis-6379.pid ---- -apiVersion: v1 -kind: Service -metadata: - name: redis - labels: - app: redis -spec: - type: ClusterIP - ports: - - name: redis - port: 6379 - selector: - app: redis ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: redis - labels: - app: redis -spec: - replicas: 1 - selector: - matchLabels: - app: redis - template: - metadata: - labels: - app: redis - spec: - containers: - - name: redis - image: redis:5.0.8 - command: - - "sh" - - "-c" - - "redis-server /usr/local/etc/redis/redis.conf" - ports: - - containerPort: 6379 - volumeMounts: - - name: config - mountPath: /usr/local/etc/redis/redis.conf - subPath: redis.conf - volumes: - - name: config - configMap: - name: redis-config ---- -# Redis password -apiVersion: v1 -kind: Secret -metadata: - name: redis-password-secret -type: Opaque -data: - # echo -n "5241590000000000" | base64 - password: NTI0MTU5MDAwMDAwMDAwMA== ---- apiVersion: ray.io/v1 kind: RayCluster metadata: @@ -171,6 +97,80 @@ spec: - name: ray-logs emptyDir: {} --- +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== +--- apiVersion: v1 kind: ConfigMap metadata: diff --git a/ray-operator/config/samples/ray-cluster.external-redis.yaml b/ray-operator/config/samples/ray-cluster.external-redis.yaml index 6abdf547a08..00acf96689a 100644 --- a/ray-operator/config/samples/ray-cluster.external-redis.yaml +++ b/ray-operator/config/samples/ray-cluster.external-redis.yaml @@ -1,77 +1,3 @@ -kind: ConfigMap -apiVersion: v1 -metadata: - name: redis-config - labels: - app: redis -data: - redis.conf: |- - dir /data - port 6379 - bind 0.0.0.0 - appendonly yes - protected-mode no - requirepass 5241590000000000 - pidfile /data/redis-6379.pid ---- -apiVersion: v1 -kind: Service -metadata: - name: redis - labels: - app: redis -spec: - type: ClusterIP - ports: - - name: redis - port: 6379 - selector: - app: redis ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: redis - labels: - app: redis -spec: - replicas: 1 - selector: - matchLabels: - app: redis - template: - metadata: - labels: - app: redis - spec: - containers: - - name: redis - image: redis:5.0.8 - command: - - "sh" - - "-c" - - "redis-server /usr/local/etc/redis/redis.conf" - ports: - - containerPort: 6379 - volumeMounts: - - name: config - mountPath: /usr/local/etc/redis/redis.conf - subPath: redis.conf - volumes: - - name: config - configMap: - name: redis-config ---- -# Redis password -apiVersion: v1 -kind: Secret -metadata: - name: redis-password-secret -type: Opaque -data: - # echo -n "5241590000000000" | base64 - password: NTI0MTU5MDAwMDAwMDAwMA== ---- apiVersion: ray.io/v1 kind: RayCluster metadata: @@ -171,6 +97,80 @@ spec: - name: ray-logs emptyDir: {} --- +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== +--- apiVersion: v1 kind: ConfigMap metadata: diff --git a/ray-operator/config/samples/ray-cluster.fluentbit.yaml b/ray-operator/config/samples/ray-cluster.fluentbit.yaml new file mode 100644 index 00000000000..9b05b5894f2 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.fluentbit.yaml @@ -0,0 +1,82 @@ +--- +# RayCluster CR with a FluentBit sidecar +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: raycluster-fluentbit-sidecar-logs +spec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + # This config is meant for demonstration purposes only. + # Use larger Ray containers in production! + resources: + limits: + cpu: 1 + memory: 2Gi + requests: + cpu: 500m + memory: 1Gi + # Share logs with Fluent Bit + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + # Fluent Bit sidecar + - name: fluentbit + image: fluent/fluent-bit:3.2.2 + # Get Kubernetes metadata via downward API + env: + - name: POD_LABELS + valueFrom: + fieldRef: + fieldPath: metadata.labels['ray.io/cluster'] + # These resource requests for Fluent Bit should be sufficient in production. + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 100m + memory: 128Mi + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - mountPath: /fluent-bit/etc/fluent-bit.conf + subPath: fluent-bit.conf + name: fluentbit-config + # Log and config volumes + volumes: + - name: ray-logs + emptyDir: {} + - name: fluentbit-config + configMap: + name: fluentbit-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluentbit-config +data: + fluent-bit.conf: | + [INPUT] + Name tail + Path /tmp/ray/session_latest/logs/* + Tag ray + Path_Key true + Refresh_Interval 5 + [FILTER] + Name modify + Match ray + Add POD_LABELS ${POD_LABELS} + [OUTPUT] + Name loki + Match * + Host loki-gateway + Port 80 + Labels RayCluster=${POD_LABELS} + tenant_id test diff --git a/ray-operator/config/samples/ray-cluster.heterogeneous.yaml b/ray-operator/config/samples/ray-cluster.heterogeneous.yaml index 89d44037788..cbf845b3133 100644 --- a/ray-operator/config/samples/ray-cluster.heterogeneous.yaml +++ b/ray-operator/config/samples/ray-cluster.heterogeneous.yaml @@ -1,27 +1,3 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: ray-code -data: - sample_code.py: | - import ray - print("trying to connect to Ray!") - ray.init() - print("now executing some code with Ray!") - import time - start = time.time() - @ray.remote - def f(): - time.sleep(0.01) - return ray._private.services.get_node_ip_address() - values=set(ray.get([f.remote() for _ in range(1000)])) - print("Ray Nodes: ",str(values)) - file = open("/tmp/ray_nodes.txt","a") - file.write("available nodes: %s\n" % str(values)) - file.close() - end = time.time() - print("Execution time = ",end - start) ---- # The resource requests and limits in this config are too small for production! # For examples with more realistic resource configuration, see # ray-cluster.complete.large.yaml and @@ -140,3 +116,27 @@ spec: - name: ray-logs emptyDir: {} ######################status################################# +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ray-code +data: + sample_code.py: | + import ray + print("trying to connect to Ray!") + ray.init() + print("now executing some code with Ray!") + import time + start = time.time() + @ray.remote + def f(): + time.sleep(0.01) + return ray._private.services.get_node_ip_address() + values=set(ray.get([f.remote() for _ in range(1000)])) + print("Ray Nodes: ",str(values)) + file = open("/tmp/ray_nodes.txt","a") + file.write("available nodes: %s\n" % str(values)) + file.close() + end = time.time() + print("Execution time = ",end - start) diff --git a/ray-operator/config/samples/ray-cluster.mini.yaml b/ray-operator/config/samples/ray-cluster.mini.yaml deleted file mode 100644 index 36de3081cb4..00000000000 --- a/ray-operator/config/samples/ray-cluster.mini.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# This example config does not specify resource requests or limits. -# For examples with more realistic resource configuration, see -# ray-cluster.complete.large.yaml and -# ray-cluster.autoscaler.large.yaml. -apiVersion: ray.io/v1 -kind: RayCluster -metadata: - labels: - controller-tools.k8s.io: "1.0" - # A unique identifier for the head node and workers of this cluster. - name: raycluster-mini -spec: - rayVersion: '2.9.0' # should match the Ray version in the image of the containers - # Ray head pod template - headGroupSpec: - # The `rayStartParams` are used to configure the `ray start` command. - # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. - # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. - rayStartParams: - dashboard-host: '0.0.0.0' - #pod template - template: - spec: - containers: - - name: ray-head - image: rayproject/ray:2.9.0 - resources: - limits: - cpu: 1 - memory: 2Gi - requests: - cpu: 500m - memory: 2Gi - ports: - - containerPort: 6379 - name: gcs-server - - containerPort: 8265 # Ray dashboard - name: dashboard - - containerPort: 10001 - name: client diff --git a/ray-operator/config/samples/ray-cluster.sample.yaml b/ray-operator/config/samples/ray-cluster.sample.yaml new file mode 100644 index 00000000000..139f10f3024 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.sample.yaml @@ -0,0 +1,54 @@ +# This example config does not specify resource requests or limits. +# For examples with more realistic resource configuration, see +# ray-cluster.complete.large.yaml and +# ray-cluster.autoscaler.large.yaml. +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: raycluster-kuberay +spec: + rayVersion: '2.9.0' # should match the Ray version in the image of the containers + # Ray head pod template + headGroupSpec: + rayStartParams: {} + #pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: 1 + memory: 2G + requests: + cpu: 1 + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + workerGroupSpecs: + # the pod replicas in this group typed worker + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + # logical group name, for this called small-group, also can be functional + groupName: workergroup + rayStartParams: {} + #pod template + template: + spec: + containers: + - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: 1 + memory: 1G + requests: + cpu: 1 + memory: 1G diff --git a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml index 6cda8c4a5c4..35c39aa60dc 100644 --- a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml +++ b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml @@ -49,7 +49,6 @@ metadata: # $IP/$CLUSTER_NAME/#/actors rewrites to $IP/#/actors ($1 maps to #/actors) # $IP/$CLUSTER_NAME/#/node rewrites to $IP/#/node ($1 maps to #/node) name: raycluster-ingress-head-ingress - namespace: default spec: ingressClassName: nginx # This should be aligned with the ingress-class of the ingress controller rules: diff --git a/ray-operator/config/samples/ray-cluster.tls.yaml b/ray-operator/config/samples/ray-cluster.tls.yaml index 8c22beb34ca..f1ee3a3753e 100644 --- a/ray-operator/config/samples/ray-cluster.tls.yaml +++ b/ray-operator/config/samples/ray-cluster.tls.yaml @@ -79,10 +79,6 @@ spec: name: dashboard - containerPort: 10001 name: client - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] volumeMounts: - mountPath: /tmp/ray name: ray-logs @@ -160,10 +156,6 @@ spec: containers: - name: ray-worker image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] # use volumeMounts.Optional. # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ volumeMounts: diff --git a/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml index 1dbac4ff1dc..6bb9ce8cb82 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v4-multihost.yaml @@ -111,4 +111,3 @@ spec: cloud.google.com/gke-ray-node-type: worker app.kubernetes.io/name: kuberay app.kubernetes.io/instance: example-cluster - diff --git a/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml index 4c73d71f1ce..7539d07893e 100644 --- a/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml +++ b/ray-operator/config/samples/ray-cluster.tpu-v4-singlehost.yaml @@ -107,4 +107,3 @@ spec: cloud.google.com/gke-ray-node-type: worker app.kubernetes.io/name: kuberay app.kubernetes.io/instance: example-cluster - diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml new file mode 100644 index 00000000000..485d85d1de6 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-16-multihost.yaml @@ -0,0 +1,70 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: tpu-ray-cluster +spec: + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + - containerPort: 8888 + name: grpc + workerGroupSpecs: + - groupName: tpu-group + replicas: 1 + minReplicas: 0 + maxReplicas: 1 + numOfHosts: 4 + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + env: + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: VBAR_CONTROL_SERVICE_URL + value: $(NODE_IP):8353 + - name: JAX_PLATFORMS + value: tpu,cpu + - name: ENABLE_PJRT_COMPATIBILITY + value: "true" + ports: + - containerPort: 8081 + name: mxla + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 4x4 diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml new file mode 100644 index 00000000000..c3b28764260 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-256-multihost.yaml @@ -0,0 +1,70 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: tpu-ray-cluster +spec: + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + - containerPort: 8888 + name: grpc + workerGroupSpecs: + - groupName: tpu-group + replicas: 1 + minReplicas: 0 + maxReplicas: 1 + numOfHosts: 64 + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + env: + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: VBAR_CONTROL_SERVICE_URL + value: $(NODE_IP):8353 + - name: JAX_PLATFORMS + value: tpu,cpu + - name: ENABLE_PJRT_COMPATIBILITY + value: "true" + ports: + - containerPort: 8081 + name: mxla + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 16x16 diff --git a/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml b/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml new file mode 100644 index 00000000000..1199a20d343 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.tpu-v6e-singlehost.yaml @@ -0,0 +1,58 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: tpu-ray-cluster +spec: + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + - containerPort: 8081 + name: mxla + - containerPort: 8888 + name: grpc + workerGroupSpecs: + - groupName: tpu-group + replicas: 1 + minReplicas: 0 + maxReplicas: 1 + numOfHosts: 1 + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 2x2 diff --git a/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml b/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml new file mode 100644 index 00000000000..b3ad5374256 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.yunikorn-scheduler.yaml @@ -0,0 +1,42 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: test-yunikorn-0 + labels: + ray.io/gang-scheduling-enabled: "true" + yunikorn.apache.org/app-id: test-yunikorn-0 + yunikorn.apache.org/queue: root.test +spec: + rayVersion: "2.9.0" + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: "1" + memory: "2Gi" + requests: + cpu: "1" + memory: "2Gi" + workerGroupSpecs: + - groupName: worker + rayStartParams: {} + replicas: 2 + minReplicas: 2 + maxReplicas: 2 + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: "1" + memory: "1Gi" + requests: + cpu: "1" + memory: "1Gi" diff --git a/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml b/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml new file mode 100644 index 00000000000..c32d75274a3 --- /dev/null +++ b/ray-operator/config/samples/ray-data-image-resize/ray-data-image-resize-gcsfusecsi-job.yaml @@ -0,0 +1,106 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: ray-data-image-resize-gcsfuse +spec: + entrypoint: python ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize_gcsfuse.py + runtimeEnvYAML: | + pip: + - torch + - torchvision + - numpy + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + env_vars: + BUCKET_PREFIX: images + shutdownAfterJobFinishes: true + ttlSecondsAfterFinished: 30 + rayClusterSpec: + headGroupSpec: + rayStartParams: + disable-usage-stats: 'true' + template: + metadata: + annotations: + gke-gcsfuse/cpu-limit: '0' + gke-gcsfuse/ephemeral-storage-limit: '0' + gke-gcsfuse/memory-limit: '0' + gke-gcsfuse/volumes: 'true' + spec: + containers: + - image: rayproject/ray:2.9.3 + name: ray-head + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + requests: + cpu: '1' + memory: 4Gi + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - name: dshm + mountPath: /dev/shm + - mountPath: /data + name: gcs-fuse-csi-ephemeral + volumes: + - emptyDir: {} + name: ray-logs + - name: dshm + emptyDir: + medium: Memory + - csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + # replace the bucketName to the Google Cloud Storage bucket of your choice. For non-public bucket, ensure access control is setup for the pod by following https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/cloud-storage-fuse-csi-driver#authentication + bucketName: ray-images + mountOptions: implicit-dirs,anonymous-access,uid=1000,gid=100,metadata-cache:ttl-secs:-1,metadata-cache:stat-cache-max-size-mb:-1,metadata-cache:type-cache-max-size-mb:-1 + skipCSIBucketAccessCheck: 'true' + name: gcs-fuse-csi-ephemeral + rayVersion: 2.9.3 + workerGroupSpecs: + - groupName: worker-group + maxReplicas: 3 + minReplicas: 1 + rayStartParams: {} + replicas: 3 + template: + metadata: + annotations: + gke-gcsfuse/cpu-limit: '0' + gke-gcsfuse/ephemeral-storage-limit: '0' + gke-gcsfuse/memory-limit: '0' + gke-gcsfuse/volumes: 'true' + spec: + containers: + - image: rayproject/ray:2.9.3 + name: ray-worker + resources: + requests: + cpu: '1' + memory: 4Gi + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - name: dshm + mountPath: /dev/shm + - mountPath: /data + name: gcs-fuse-csi-ephemeral + volumes: + - emptyDir: {} + name: ray-logs + - name: dshm + emptyDir: + medium: Memory + - csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + # replace the bucketName to the Google Cloud Storage bucket of your choice. For non-public bucket, ensure access control is setup for the pod by following https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/cloud-storage-fuse-csi-driver#authentication + bucketName: ray-images + mountOptions: implicit-dirs,anonymous-access,uid=1000,gid=100,metadata-cache:ttl-secs:-1,metadata-cache:stat-cache-max-size-mb:-1,metadata-cache:type-cache-max-size-mb:-1 + skipCSIBucketAccessCheck: 'true' + name: gcs-fuse-csi-ephemeral diff --git a/ray-operator/config/samples/ray-data-image-resize/ray-job.image-resize.yaml b/ray-operator/config/samples/ray-data-image-resize/ray-job.image-resize.yaml new file mode 100644 index 00000000000..84d2e954702 --- /dev/null +++ b/ray-operator/config/samples/ray-data-image-resize/ray-job.image-resize.yaml @@ -0,0 +1,61 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: image-resize +spec: + shutdownAfterJobFinishes: false + entrypoint: python ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize.py + runtimeEnvYAML: | + pip: + - torch + - torchvision + - numpy + - google-cloud-storage + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + env_vars: + BUCKET_NAME: ray-images + BUCKET_PREFIX: images + # rayClusterSpec specifies the RayCluster instance to be created by the RayJob controller. + rayClusterSpec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "2" + memory: "4Gi" + requests: + cpu: "2" + memory: "4Gi" + workerGroupSpecs: + - replicas: 4 + minReplicas: 1 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.0 + resources: + limits: + cpu: "2" + memory: "4Gi" + requests: + cpu: "2" + memory: "4Gi" diff --git a/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize.py b/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize.py new file mode 100644 index 00000000000..7e61a2f6217 --- /dev/null +++ b/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize.py @@ -0,0 +1,65 @@ +from typing import Dict + +import numpy as np +import ray +import io +import os + +import torch +from torchvision import transforms + +from google.cloud import storage + +bucket_name = os.environ["BUCKET_NAME"] +prefix = os.environ["BUCKET_PREFIX"] +allowed_extensions = ('.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp', '.gif') + +def list_blobs(bucket_name, prefix): + client = storage.Client().create_anonymous_client() + bucket = client.bucket(bucket_name) + blobs = bucket.list_blobs(prefix=prefix) + + blob_files = [] + for blob in blobs: + if blob.name.lower().endswith(allowed_extensions): + blob_files.append(blob.name) + return blob_files + +def download_blob(blob_name): + client = storage.Client().create_anonymous_client() + bucket = client.bucket(bucket_name) + blob = bucket.blob(blob_name['item']) + data = blob.download_as_bytes() + + from PIL import Image + Image.MAX_IMAGE_PIXELS = None + image = np.array(Image.open(io.BytesIO(data))) + + return {"image": image} + +def transform_frame(row: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: + transform = transforms.Compose( + [transforms.ToTensor(), transforms.Resize((256, 256)), transforms.ConvertImageDtype(torch.float)] + ) + row["image"] = transform(row["image"]) + return row + +def main(): + """ + This is a CPU-only job that reads images from + Google Cloud Storage and resizes them. + """ + ray.init() + + blobs = list_blobs(bucket_name, prefix) + dataset = ray.data.from_items(blobs) + dataset = dataset.map(download_blob) + dataset = dataset.map(transform_frame) + + dataset_iter = dataset.iter_batches(batch_size=None) + for _ in dataset_iter: + pass + + +if __name__ == "__main__": + main() diff --git a/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize_gcsfuse.py b/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize_gcsfuse.py new file mode 100644 index 00000000000..27804af399f --- /dev/null +++ b/ray-operator/config/samples/ray-data-image-resize/ray_data_image_resize_gcsfuse.py @@ -0,0 +1,73 @@ +from typing import Dict, List +import numpy as np +import ray +import os + +import torch +from torchvision import transforms +from PIL import Image + +allowed_extensions = ('.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp', '.gif') +bucket_prefix = os.environ["BUCKET_PREFIX"] +prefix = "/data/" + bucket_prefix + +def find_image_files(directory): + image_files = [] + for root, dirs, files in os.walk(directory): + for file in files: + if file.lower().endswith(allowed_extensions): + # print ("found file ", file) + image_files.append(os.path.join(root, file)) + + return image_files + +class ReadImageFiles: + def __call__(self, text_batch: List[str]): + Image.MAX_IMAGE_PIXELS = None + text = text_batch['item'] + images = [] + for t in text: + a = np.array(Image.open(t)) + images.append(a) + + return {'results': list(zip(text, images))} + +class TransformImages: + def __init__(self): + self.transform = transforms.Compose( + [transforms.ToTensor(), transforms.Resize((256, 256)), transforms.ConvertImageDtype(torch.float)] + ) + def __call__(self, image_batch: Dict[str, List]): + images = image_batch['results'] + images_transformed = [] + # input is a tuple of (filepath str, image ndarray) + for t in images: + images_transformed.append(self.transform(t[1])) + + return {'results': images_transformed} + +def main(): + """ + This is a CPU-only job that reads images from a Google Cloud Storage bucket and resizes them. + The bucket is mounted as a volume to the underlying pod by the GKE GCSFuse CSI driver. + """ + ray.init() + print("Enumerate files in prefix ", prefix) + image_files = find_image_files(prefix) + print("For prefix ", prefix, " number of image_files", len(image_files)) + if len(image_files) == 0: + print ("no files to process") + return + + dataset = ray.data.from_items(image_files) + dataset = dataset.flat_map(lambda row: [{'item': row['item']}]) + dataset = dataset.map_batches(ReadImageFiles, batch_size=16, concurrency=2) + dataset = dataset.map_batches(TransformImages, batch_size=16, concurrency=2) + + dataset_iter = dataset.iter_batches(batch_size=None) + for _ in dataset_iter: + pass + print("done") + +if __name__ == "__main__": + main() diff --git a/ray-operator/config/samples/ray-job.batch-inference.yaml b/ray-operator/config/samples/ray-job.batch-inference.yaml index a66764e81a1..208c57ee411 100644 --- a/ray-operator/config/samples/ray-job.batch-inference.yaml +++ b/ray-operator/config/samples/ray-job.batch-inference.yaml @@ -77,10 +77,10 @@ data: def __call__(self, batch: Dict[str, np.ndarray]): # Convert the numpy array of images into a list of PIL images which is the format the HF pipeline expects. outputs = self.classifier( - [Image.fromarray(image_array) for image_array in batch["image"]], - top_k=1, + [Image.fromarray(image_array) for image_array in batch["image"]], + top_k=1, batch_size=BATCH_SIZE) - + # `outputs` is a list of length-one lists. For example: # [[{'score': '...', 'label': '...'}], ..., [{'score': '...', 'label': '...'}]] batch["score"] = [output[0]["score"] for output in outputs] diff --git a/ray-operator/config/samples/ray-job.custom-head-svc.yaml b/ray-operator/config/samples/ray-job.custom-head-svc.yaml index 8364387781b..0bca113770a 100644 --- a/ray-operator/config/samples/ray-job.custom-head-svc.yaml +++ b/ray-operator/config/samples/ray-job.custom-head-svc.yaml @@ -16,7 +16,7 @@ spec: - requests==2.26.0 - pendulum==2.1.2 env_vars: - counter_name: "test_counter" + counter_name: "test_counter" # rayClusterSpec specifies the RayCluster instance to be created by the RayJob controller. rayClusterSpec: @@ -76,10 +76,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-job.modin.yaml b/ray-operator/config/samples/ray-job.modin.yaml new file mode 100644 index 00000000000..cbb11caf99e --- /dev/null +++ b/ray-operator/config/samples/ray-job.modin.yaml @@ -0,0 +1,72 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-sample +spec: + entrypoint: python /home/ray/samples/sample_code.py + runtimeEnvYAML: | + pip: + - modin[all]==0.31.0 + rayClusterSpec: + rayVersion: "2.31.0" + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.31.0 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "1" + requests: + cpu: "200m" + volumeMounts: + - mountPath: /home/ray/samples + name: code-sample + volumes: + - name: code-sample + configMap: + name: ray-job-code-sample + items: + - key: sample_code.py + path: sample_code.py +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ray-job-code-sample +data: + sample_code.py: | + import time + import urllib.request + import ray + import numpy as np + import modin.pandas as pd + import modin.config as modin_cfg + from modin.config import BenchmarkMode + + ray.init() + + print("Modin Engine:", modin_cfg.Engine.get()) + BenchmarkMode.put(True) + + url_path = "https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv" + df = pd.read_csv(url_path, parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3) + t0 = time.perf_counter() + + isnull = df.isnull() + t1 = time.perf_counter() + + rounded_trip_distance = df[["pickup_longitude"]].applymap(round) + t2 = time.perf_counter() + + print("Time to compute isnull:", t1 - t0) + print("Time to compute rounded_trip_distance:", t2 - t1) diff --git a/ray-operator/config/samples/ray-job.resources.yaml b/ray-operator/config/samples/ray-job.resources.yaml index 7b21e081d09..79111294992 100644 --- a/ray-operator/config/samples/ray-job.resources.yaml +++ b/ray-operator/config/samples/ray-job.resources.yaml @@ -9,10 +9,10 @@ spec: # ttlSecondsAfterFinished specifies the number of seconds after which the RayCluster will be deleted after the RayJob finishes. # ttlSecondsAfterFinished: 10 - + # RuntimeEnvYAML represents the runtime environment configuration provided as a multi-line YAML string. # See https://docs.ray.io/en/latest/ray-core/handling-dependencies.html for details. - # (New in KubeRay version 1.0.) + # (New in KubeRay version 1.0.) runtimeEnvYAML: | pip: - requests==2.26.0 @@ -41,7 +41,7 @@ spec: num-cpus: "3" num-gpus: "2" resources: '"{\"resource1\": 2, \"resource2\": 3}"' - + #pod template template: spec: @@ -88,10 +88,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.5.0 - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] resources: requests: cpu: "200m" @@ -106,7 +102,7 @@ spec: # # If Command is not specified, the correct command will be supplied at runtime using the RayJob spec `entrypoint` field. # # Specifying Command is not recommended. # # command: ["ray job submit --address=http://rayjob-sample-raycluster-v6qcq-head-svc.default.svc.cluster.local:8265 -- echo hello world"] - + ######################Ray code sample################################# # this sample is from https://docs.ray.io/en/latest/cluster/job-submission.html#quick-start-example diff --git a/ray-operator/config/samples/ray-job.sample.yaml b/ray-operator/config/samples/ray-job.sample.yaml index 96af157ef86..3b7143fd872 100644 --- a/ray-operator/config/samples/ray-job.sample.yaml +++ b/ray-operator/config/samples/ray-job.sample.yaml @@ -91,10 +91,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-job.shutdown.yaml b/ray-operator/config/samples/ray-job.shutdown.yaml index 1b8ccf71dfb..b4b40b270db 100644 --- a/ray-operator/config/samples/ray-job.shutdown.yaml +++ b/ray-operator/config/samples/ray-job.shutdown.yaml @@ -84,10 +84,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml new file mode 100644 index 00000000000..129cd9d6953 --- /dev/null +++ b/ray-operator/config/samples/ray-job.tpu-v6e-16-multihost.yaml @@ -0,0 +1,76 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: v6e-16-job +spec: + entrypoint: python ray-operator/config/samples/tpu/tpu_list_devices.py + runtimeEnvYAML: | + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + pip: + - jax[tpu]==0.4.33 + - -f https://storage.googleapis.com/jax-releases/libtpu_releases.html + rayClusterSpec: + rayVersion: '2.37.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8888 + name: grpc + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + numOfHosts: 4 + groupName: tpu-group + rayStartParams: {} + template: + spec: + securityContext: + runAsUser: 0 + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + env: + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: VBAR_CONTROL_SERVICE_URL + value: $(NODE_IP):8353 + - name: JAX_PLATFORMS + value: tpu,cpu + - name: ENABLE_PJRT_COMPATIBILITY + value: "true" + ports: + - containerPort: 8081 + name: mxla + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 4x4 diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml new file mode 100644 index 00000000000..09e45f735ad --- /dev/null +++ b/ray-operator/config/samples/ray-job.tpu-v6e-256-multihost.yaml @@ -0,0 +1,64 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: v6e-256-job +spec: + entrypoint: python ray-operator/config/samples/tpu/tpu_list_devices.py + runtimeEnvYAML: | + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + pip: + - jax[tpu]==0.4.33 + - -f https://storage.googleapis.com/jax-releases/libtpu_releases.html + rayClusterSpec: + rayVersion: '2.37.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8888 + name: grpc + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + numOfHosts: 64 + groupName: tpu-group + rayStartParams: + resources: '"{\"TPU\": 4}"' + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + env: + - name: JAX_PLATFORMS + value: tpu,cpu + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 16x16 diff --git a/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml b/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml new file mode 100644 index 00000000000..6a4e5c76ca7 --- /dev/null +++ b/ray-operator/config/samples/ray-job.tpu-v6e-singlehost.yaml @@ -0,0 +1,60 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: v6e-4-job +spec: + entrypoint: python ray-operator/config/samples/tpu/tpu_list_devices.py + runtimeEnvYAML: | + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + pip: + - jax[tpu]==0.4.33 + - -f https://storage.googleapis.com/jax-releases/libtpu_releases.html + rayClusterSpec: + rayVersion: '2.37.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.37.0-py310 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: "8" + memory: 40G + requests: + cpu: "8" + memory: 40G + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + numOfHosts: 1 + groupName: tpu-group + rayStartParams: {} + template: + spec: + securityContext: + runAsUser: 0 + containers: + - name: ray-worker + image: rayproject/ray:2.37.0-py310 + resources: + limits: + cpu: "24" + google.com/tpu: "4" + memory: 200G + requests: + cpu: "24" + google.com/tpu: "4" + memory: 200G + nodeSelector: + cloud.google.com/gke-tpu-accelerator: tpu-v6e-slice + cloud.google.com/gke-tpu-topology: 2x2 diff --git a/ray-operator/config/samples/ray-job.use-existing-raycluster.yaml b/ray-operator/config/samples/ray-job.use-existing-raycluster.yaml new file mode 100644 index 00000000000..96154ff897b --- /dev/null +++ b/ray-operator/config/samples/ray-job.use-existing-raycluster.yaml @@ -0,0 +1,9 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + name: rayjob-use-existing-raycluster +spec: + entrypoint: python -c "import ray; ray.init(); print(ray.cluster_resources())" + # Select an existing RayCluster called "ray-cluster-kuberay" instead of creating a new one. + clusterSelector: + ray.io/cluster: ray-cluster-kuberay diff --git a/ray-operator/config/samples/ray-service.autoscaler.yaml b/ray-operator/config/samples/ray-service.autoscaler.yaml index 3d3aee5d20e..6869dec7141 100644 --- a/ray-operator/config/samples/ray-service.autoscaler.yaml +++ b/ray-operator/config/samples/ray-service.autoscaler.yaml @@ -97,10 +97,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-service.custom-serve-service.yaml b/ray-operator/config/samples/ray-service.custom-serve-service.yaml index e0166864bc2..bba80079c5f 100644 --- a/ray-operator/config/samples/ray-service.custom-serve-service.yaml +++ b/ray-operator/config/samples/ray-service.custom-serve-service.yaml @@ -1,5 +1,5 @@ # This example shows how to configure a custom Kubernetes service for Ray Serve -# by specifying the `serveService` field in the RayService CRD. +# by specifying the `serveService` field in the RayService CRD. apiVersion: ray.io/v1 kind: RayService metadata: @@ -96,10 +96,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-service.different-port.yaml b/ray-operator/config/samples/ray-service.different-port.yaml index 340b9b3a8da..dd6b344a92d 100644 --- a/ray-operator/config/samples/ray-service.different-port.yaml +++ b/ray-operator/config/samples/ray-service.different-port.yaml @@ -83,10 +83,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-service.gke-http-gateway-ingress.yaml b/ray-operator/config/samples/ray-service.gke-http-gateway-ingress.yaml new file mode 100644 index 00000000000..c3e422cbb91 --- /dev/null +++ b/ray-operator/config/samples/ray-service.gke-http-gateway-ingress.yaml @@ -0,0 +1,84 @@ +# Kubernetes Gateway API - HTTPRoute Example +# This example usage Kubernetes Gateway API to create a HTTPRoute to RayService Serve Endpoint. +## Gateway API for Ingress +# https://gateway-api.sigs.k8s.io/#gateway-api-for-ingress +# +## GKE GatewayAPI and GatewayClass +# For more information on GatewayClasses on Google Kubernetes Engine, see: +# - https://cloud.google.com/kubernetes-engine/docs/concepts/gateway-api +# - https://cloud.google.com/kubernetes-engine/docs/how-to/gatewayclass-capabilities +## Enable Gateway API +# https://cloud.google.com/kubernetes-engine/docs/how-to/deploying-gateways#enable-gateway +# +# To test this out: +# curl --location --request POST 'http://ai.example.com/v1/chat/completions' \ +# --header 'Content-Type: application/json' \ +#--data-raw '{ +# "model": "tiiuae/falcon-7b-instruct", +# "messages": [ +# {"role": "system", "content": "You are a helpful assistant."}, +# {"role": "user", "content": "What are the top 5 most popular Indian dishes? Please be brief."} +# ] +#}' +# +--- +kind: Gateway +apiVersion: gateway.networking.k8s.io/v1beta1 +metadata: + name: ray-service-external-http +spec: + gatewayClassName: gke-l7-global-external-managed + listeners: + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: + from: Same + +--- +kind: HTTPRoute +apiVersion: gateway.networking.k8s.io/v1beta1 +metadata: + name: ray-service-serve-endpoint +spec: + hostnames: + - "ai.example.com" + parentRefs: + - kind: Gateway + name: ray-service-external-http + rules: + - + matches: + - path: + type: PathPrefix + value: "/v1/chat/completions" + - path: + type: PathPrefix + value: "/v1/completions" + backendRefs: + - name: ray-service-serve-svc + port: 8000 + +--- +apiVersion: networking.gke.io/v1 +kind: HealthCheckPolicy +metadata: + name: endpoint-healthcheck +spec: + default: + checkIntervalSec: 10 + timeoutSec: 10 + healthyThreshold: 3 + unhealthyThreshold: 3 + logConfig: + enabled: true + config: + type: HTTP + httpHealthCheck: + port: 8000 + requestPath: "/-/routes" + targetRef: + group: "" + kind: Service + name: ray-service-serve-svc diff --git a/ray-operator/config/samples/ray-service.gke-model-configmap.yaml b/ray-operator/config/samples/ray-service.gke-model-configmap.yaml new file mode 100644 index 00000000000..43558bd7748 --- /dev/null +++ b/ray-operator/config/samples/ray-service.gke-model-configmap.yaml @@ -0,0 +1,150 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: ray-service +spec: + # More details here on some of the fields: https://docs.ray.io/en/latest/serve/production-guide/kubernetes.html#setting-up-a-rayservice-custom-resource-cr + serviceUnhealthySecondThreshold: 720 + deploymentUnhealthySecondThreshold: 720 + serveConfigV2: | + applications: + - name: rayllm-serve + route_prefix: / + import_path: rayllm.backend:router_application + args: + models: + - "./models/tiiuae/falcon-7b-instruct.yaml" + rayClusterConfig: + headGroupSpec: + serviceType: NodePort + rayStartParams: + resources: '"{\"accelerator_type_cpu\": 2}"' + dashboard-host: '0.0.0.0' + block: 'true' + template: + spec: + containers: + - name: ray-head + image: anyscale/ray-llm:0.5.0 + resources: + limits: + cpu: "2" + memory: "8Gi" + requests: + cpu: "2" + memory: "8Gi" + volumeMounts: + - mountPath: /home/ray/models + name: model + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + volumes: + - name: model + configMap: + name: falcon-7b-instruct-config + items: + - key: falcon-7b-instruct.yaml + path: tiiuae/falcon-7b-instruct.yaml + workerGroupSpecs: + # Worker with Scale to 0 configuration + - replicas: 1 + minReplicas: 1 + maxReplicas: 4 + groupName: gpu-worker-group + rayStartParams: + block: 'true' + resources: '"{\"accelerator_type_cpu\": 20, \"accelerator_type_l4\": 1}"' + # pod template + template: + spec: + containers: + - name: llm + image: anyscale/ray-llm:0.5.0 + resources: + limits: + cpu: "20" + memory: "64Gi" + nvidia.com/gpu: "2" + requests: + cpu: "20" + memory: "64Gi" + nvidia.com/gpu: "2" + # Here we are loading the model from the configmap with the name falcon-7b-instruct-config + volumeMounts: + - mountPath: /home/ray/models + name: model + # Please ensure the following taint has been applied to the GPU node in the cluster. + tolerations: + - key: "ray.io/node-type" + operator: "Equal" + value: "worker" + effect: "NoSchedule" + - key: "nvidia.com/gpu" + operator: "Equal" + value: "present" + effect: "NoSchedule" + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 + # cloud.google.com/gke-accelerator: nvidia-tesla-t4 # to use: nvidia-tesla-t4 GPUs + volumes: + - name: model + configMap: + name: falcon-7b-instruct-config + items: + - key: falcon-7b-instruct.yaml + path: tiiuae/falcon-7b-instruct.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: falcon-7b-instruct-config +data: + falcon-7b-instruct.yaml: | + deployment_config: + autoscaling_config: + min_replicas: 1 + initial_replicas: 1 + max_replicas: 4 + target_num_ongoing_requests_per_replica: 24 + metrics_interval_s: 10.0 + look_back_period_s: 60.0 + smoothing_factor: 0.5 + downscale_delay_s: 600.0 + upscale_delay_s: 30.0 + max_concurrent_queries: 24 + ray_actor_options: + resources: + accelerator_type_l4: 0.1 # use: accelerator_type_t4 for T4 GPUs + engine_config: + model_id: tiiuae/falcon-7b-instruct + hf_model_id: tiiuae/falcon-7b-instruct + type: VLLMEngine + engine_kwargs: + trust_remote_code: false + max_num_batched_tokens: 2048 + max_num_seqs: 64 + gpu_memory_utilization: 0.9 + max_total_tokens: 2048 + generation: + prompt_format: + system: "{instruction}\n\n" + assistant: " {instruction} " + trailing_assistant: "" + user: "[INST] {system}{instruction}" + system_in_user: true + default_system_message: "" + stopping_sequences: [""] + scaling_config: + num_workers: 1 + num_gpus_per_worker: 1 + num_cpus_per_worker: 3 + placement_strategy: "PACK" + resources_per_worker: + accelerator_type_l4: 0.1 # use: accelerator_type_t4 for T4 GPUs diff --git a/ray-operator/config/samples/ray-service.high-availability-locust.yaml b/ray-operator/config/samples/ray-service.high-availability-locust.yaml index 5bb6fa865f9..ace6f8319ba 100644 --- a/ray-operator/config/samples/ray-service.high-availability-locust.yaml +++ b/ray-operator/config/samples/ray-service.high-availability-locust.yaml @@ -20,7 +20,7 @@ spec: min_replicas: 0 max_replicas: 5 upscale_delay_s: 3 - downscale_delay_s: 60 + downscale_delay_s: 60 metrics_interval_s: 2 look_back_period_s: 10 max_replicas_per_node: 1 @@ -32,7 +32,7 @@ spec: autoscalerOptions: idleTimeoutSeconds: 1 ######################headGroupSpecs################################# - # Ray head pod template. + # Ray head pod template. headGroupSpec: # The `rayStartParams` are used to configure the `ray start` command. # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. @@ -40,7 +40,7 @@ spec: rayStartParams: num-cpus: "0" dashboard-host: '0.0.0.0' - #pod template + #pod template template: spec: containers: @@ -78,10 +78,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: 1 @@ -138,7 +134,7 @@ data: import os class ConstantUser(FastHttpUser): - wait_time = constant(float(os.environ.get("LOCUS_WAIT_TIME", "1"))) + wait_time = constant(float(os.environ.get("LOCUST_WAIT_TIME", "1"))) network_timeout = None connection_timeout = None @task diff --git a/ray-operator/config/samples/ray-service.high-availability.yaml b/ray-operator/config/samples/ray-service.high-availability.yaml index a3d4cffbf30..00924bc8cd7 100644 --- a/ray-operator/config/samples/ray-service.high-availability.yaml +++ b/ray-operator/config/samples/ray-service.high-availability.yaml @@ -1,76 +1,3 @@ -kind: ConfigMap -apiVersion: v1 -metadata: - name: redis-config - labels: - app: redis -data: - redis.conf: |- - dir /data - port 6379 - bind 0.0.0.0 - appendonly yes - protected-mode no - requirepass 5241590000000000 - pidfile /data/redis-6379.pid ---- -apiVersion: v1 -kind: Service -metadata: - name: redis - labels: - app: redis -spec: - type: ClusterIP - ports: - - name: redis - port: 6379 - selector: - app: redis ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: redis - labels: - app: redis -spec: - replicas: 1 - selector: - matchLabels: - app: redis - template: - metadata: - labels: - app: redis - spec: - containers: - - name: redis - image: redis:5.0.8 - command: - - "sh" - - "-c" - - "redis-server /usr/local/etc/redis/redis.conf" - ports: - - containerPort: 6379 - volumeMounts: - - name: config - mountPath: /usr/local/etc/redis/redis.conf - subPath: redis.conf - volumes: - - name: config - configMap: - name: redis-config ---- -# Redis password -apiVersion: v1 -kind: Secret -metadata: - name: redis-password-secret -type: Opaque -data: - # echo -n "5241590000000000" | base64 - password: NTI0MTU5MDAwMDAwMDAwMA== --- apiVersion: ray.io/v1 kind: RayService @@ -85,7 +12,7 @@ spec: import_path: fruit.deployment_graph route_prefix: /fruit runtime_env: - working_dir: "https://github.com/ray-project/test_dag/archive/41d09119cbdf8450599f993f51318e9e27c59098.zip" + working_dir: "https://github.com/ray-project/test_dag/archive/4d2c9a59d9eabfd4c8a9e04a7aae44fc8f5b416f.zip" deployments: - name: MangoStand num_replicas: 2 @@ -113,11 +40,6 @@ spec: max_replicas_per_node: 1 ray_actor_options: num_cpus: 0.1 - - name: DAGDriver - num_replicas: 2 - max_replicas_per_node: 1 - ray_actor_options: - num_cpus: 0.1 rayClusterConfig: rayVersion: '2.9.0' # should match the Ray version in the image of the containers ######################headGroupSpecs################################# @@ -144,10 +66,10 @@ spec: resources: limits: cpu: 1 - memory: 1Gi + memory: 2Gi requests: cpu: 1 - memory: 1Gi + memory: 2Gi ports: - containerPort: 6379 name: gcs-server @@ -174,10 +96,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: 1 @@ -230,3 +148,77 @@ spec: items: - key: query.py path: query.py +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== diff --git a/ray-operator/config/samples/ray-service.mobilenet.yaml b/ray-operator/config/samples/ray-service.mobilenet.yaml index 826b03f3ba7..6585569ad9c 100644 --- a/ray-operator/config/samples/ray-service.mobilenet.yaml +++ b/ray-operator/config/samples/ray-service.mobilenet.yaml @@ -64,10 +64,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray-ml:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: 1 diff --git a/ray-operator/config/samples/ray-service.sample.yaml b/ray-operator/config/samples/ray-service.sample.yaml index a61bfc0f8eb..88393acd32a 100644 --- a/ray-operator/config/samples/ray-service.sample.yaml +++ b/ray-operator/config/samples/ray-service.sample.yaml @@ -108,10 +108,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-service.text-ml.yaml b/ray-operator/config/samples/ray-service.text-ml.yaml index 8b04d16398b..a4201137290 100644 --- a/ray-operator/config/samples/ray-service.text-ml.yaml +++ b/ray-operator/config/samples/ray-service.text-ml.yaml @@ -78,10 +78,6 @@ spec: containers: - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.9.0 - lifecycle: - preStop: - exec: - command: ["/bin/sh","-c","ray stop"] resources: limits: cpu: "1" diff --git a/ray-operator/config/samples/ray-service.tpu-single-host.yaml b/ray-operator/config/samples/ray-service.tpu-single-host.yaml new file mode 100644 index 00000000000..d7f441c2937 --- /dev/null +++ b/ray-operator/config/samples/ray-service.tpu-single-host.yaml @@ -0,0 +1,83 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: stable-diffusion-tpu +spec: + serveConfigV2: | + applications: + - name: stable_diffusion + import_path: stable_diffusion.stable_diffusion_tpu:deployment + runtime_env: + working_dir: "https://github.com/ray-project/serve_config_examples/archive/refs/heads/master.zip" + pip: + - pydantic<2 + - google-api-python-client + - pillow + - diffusers==0.7.2 + - transformers==4.24.0 + - flax + - ml_dtypes==0.2.0 + - jax[tpu]==0.4.11 + - -f https://storage.googleapis.com/jax-releases/libtpu_releases.html + - fastapi + rayClusterConfig: + rayVersion: '2.9.0' # Should match the Ray version in the image of the containers + ######################headGroupSpecs################################# + # Ray head pod template. + headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0-py310 + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + resources: + limits: + cpu: "2" + memory: "8G" + requests: + cpu: "2" + memory: "8G" + workerGroupSpecs: + # The pod replicas in this group typed worker + - replicas: 1 + minReplicas: 1 + maxReplicas: 10 + numOfHosts: 1 + groupName: tpu-group + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.0-py310 + resources: + limits: + # ct4p-hightpu-4t (v4) TPUs have 240 vCPUs, adjust this value based on your resource needs + cpu: "100" + ephemeral-storage: 20Gi + google.com/tpu: "4" + memory: 200G + requests: + cpu: "100" + ephemeral-storage: 20Gi + google.com/tpu: "4" + memory: 200G + nodeSelector: + # https://cloud.google.com/kubernetes-engine/docs/concepts/tpus + cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice + cloud.google.com/gke-tpu-topology: 2x2x1 diff --git a/ray-operator/config/samples/tpu/tpu_list_devices.py b/ray-operator/config/samples/tpu/tpu_list_devices.py new file mode 100644 index 00000000000..ea65f2df459 --- /dev/null +++ b/ray-operator/config/samples/tpu/tpu_list_devices.py @@ -0,0 +1,20 @@ +import os +import ray +import jax +import time + +from jax.experimental import multihost_utils + +ray.init() + +@ray.remote(resources={"TPU": 4}) +def tpu_cores(): + multihost_utils.sync_global_devices("sync") + cores = "TPU cores:" + str(jax.device_count()) + print("TPU Worker: " + os.environ.get("TPU_WORKER_ID")) + return cores + +num_workers = int(ray.available_resources()["TPU"]) // 4 +print(f"Number of TPU Workers: {num_workers}") +result = [tpu_cores.remote() for _ in range(num_workers)] +print(ray.get(result)) diff --git a/ray-operator/config/samples/vllm/ray-service.vllm.yaml b/ray-operator/config/samples/vllm/ray-service.vllm.yaml new file mode 100644 index 00000000000..1efdfba85d7 --- /dev/null +++ b/ray-operator/config/samples/vllm/ray-service.vllm.yaml @@ -0,0 +1,85 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: llama-3-8b +spec: + serveConfigV2: | + applications: + - name: llm + route_prefix: / + import_path: ray-operator.config.samples.vllm.serve:model + deployments: + - name: VLLMDeployment + num_replicas: 1 + ray_actor_options: + num_cpus: 8 + # NOTE: num_gpus is set automatically based on TENSOR_PARALLELISM + runtime_env: + working_dir: "https://github.com/ray-project/kuberay/archive/master.zip" + pip: ["vllm==0.5.4"] + env_vars: + MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct" + TENSOR_PARALLELISM: "2" + PIPELINE_PARALLELISM: "1" + rayClusterConfig: + headGroupSpec: + rayStartParams: + dashboard-host: '0.0.0.0' + template: + spec: + containers: + - name: ray-head + image: rayproject/ray-ml:2.33.0.914af0-py311 + resources: + limits: + cpu: "2" + memory: "8Gi" + requests: + cpu: "2" + memory: "8Gi" + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + env: + - name: HUGGING_FACE_HUB_TOKEN + valueFrom: + secretKeyRef: + name: hf-secret + key: hf_api_token + workerGroupSpecs: + - replicas: 1 + minReplicas: 0 + maxReplicas: 4 + groupName: gpu-group + rayStartParams: {} + template: + spec: + containers: + - name: llm + image: rayproject/ray-ml:2.33.0.914af0-py311 + env: + - name: HUGGING_FACE_HUB_TOKEN + valueFrom: + secretKeyRef: + name: hf-secret + key: hf_api_token + resources: + limits: + cpu: "8" + memory: "20Gi" + nvidia.com/gpu: "2" + requests: + cpu: "8" + memory: "20Gi" + nvidia.com/gpu: "2" + # Please add the following taints to the GPU node. + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/ray-operator/config/samples/vllm/serve.py b/ray-operator/config/samples/vllm/serve.py new file mode 100644 index 00000000000..353cff255b3 --- /dev/null +++ b/ray-operator/config/samples/vllm/serve.py @@ -0,0 +1,125 @@ +import os + +from typing import Dict, Optional, List +import logging + +from fastapi import FastAPI +from starlette.requests import Request +from starlette.responses import StreamingResponse, JSONResponse + +from ray import serve + +from vllm.engine.arg_utils import AsyncEngineArgs +from vllm.engine.async_llm_engine import AsyncLLMEngine +from vllm.entrypoints.openai.cli_args import make_arg_parser +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, + ChatCompletionResponse, + ErrorResponse, +) +from vllm.entrypoints.openai.serving_chat import OpenAIServingChat +from vllm.entrypoints.openai.serving_engine import LoRAModulePath +from vllm.utils import FlexibleArgumentParser + +logger = logging.getLogger("ray.serve") + +app = FastAPI() + + +@serve.deployment(name="VLLMDeployment") +@serve.ingress(app) +class VLLMDeployment: + def __init__( + self, + engine_args: AsyncEngineArgs, + response_role: str, + lora_modules: Optional[List[LoRAModulePath]] = None, + chat_template: Optional[str] = None, + ): + logger.info(f"Starting with engine args: {engine_args}") + self.openai_serving_chat = None + self.engine_args = engine_args + self.response_role = response_role + self.lora_modules = lora_modules + self.chat_template = chat_template + self.engine = AsyncLLMEngine.from_engine_args(engine_args) + + @app.post("/v1/chat/completions") + async def create_chat_completion( + self, request: ChatCompletionRequest, raw_request: Request + ): + """OpenAI-compatible HTTP endpoint. + + API reference: + - https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html + """ + if not self.openai_serving_chat: + model_config = await self.engine.get_model_config() + # Determine the name of the served model for the OpenAI client. + if self.engine_args.served_model_name is not None: + served_model_names = self.engine_args.served_model_name + else: + served_model_names = [self.engine_args.model] + self.openai_serving_chat = OpenAIServingChat( + self.engine, + model_config, + served_model_names=served_model_names, + response_role=self.response_role, + lora_modules=self.lora_modules, + chat_template=self.chat_template, + prompt_adapters=None, + request_logger=None, + ) + logger.info(f"Request: {request}") + generator = await self.openai_serving_chat.create_chat_completion( + request, raw_request + ) + if isinstance(generator, ErrorResponse): + return JSONResponse( + content=generator.model_dump(), status_code=generator.code + ) + if request.stream: + return StreamingResponse(content=generator, media_type="text/event-stream") + else: + assert isinstance(generator, ChatCompletionResponse) + return JSONResponse(content=generator.model_dump()) + + +def parse_vllm_args(cli_args: Dict[str, str]): + """Parses vLLM args based on CLI inputs. + + Currently uses argparse because vLLM doesn't expose Python models for all of the + config options we want to support. + """ + parser = FlexibleArgumentParser(description="vLLM CLI") + parser = make_arg_parser(parser) + arg_strings = [] + for key, value in cli_args.items(): + arg_strings.extend([f"--{key}", str(value)]) + logger.info(arg_strings) + parsed_args = parser.parse_args(args=arg_strings) + return parsed_args + + +def build_app(cli_args: Dict[str, str]) -> serve.Application: + """Builds the Serve app based on CLI arguments. + + See https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server + for the complete set of arguments. + + Supported engine arguments: https://docs.vllm.ai/en/latest/models/engine_args.html. + """ # noqa: E501 + parsed_args = parse_vllm_args(cli_args) + engine_args = AsyncEngineArgs.from_cli_args(parsed_args) + engine_args.worker_use_ray = True + + return VLLMDeployment.bind( + engine_args, + parsed_args.response_role, + parsed_args.lora_modules, + parsed_args.chat_template, + ) + + +model = build_app( + {"model": os.environ['MODEL_ID'], "tensor-parallel-size": os.environ['TENSOR_PARALLELISM'], "pipeline-parallel-size": os.environ['PIPELINE_PARALLELISM']}) diff --git a/ray-operator/config/security/kind-config.yaml b/ray-operator/config/security/kind-config.yaml index 7422b24139d..2759f67dd63 100644 --- a/ray-operator/config/security/kind-config.yaml +++ b/ray-operator/config/security/kind-config.yaml @@ -27,4 +27,3 @@ nodes: - hostPath: ray-operator/config/security/audit-policy.yaml containerPath: /etc/kubernetes/policies/audit-policy.yaml readOnly: true - diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 8e773f9d78f..1427733848e 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -77,7 +77,7 @@ spec: # workersToDelete: # - raycluster-complete-worker-large-group-bdtwh # - raycluster-complete-worker-large-group-hv457 - # - raycluster-complete-worker-large-group-k8tj7 + # - raycluster-complete-worker-large-group-k8tj7 # the following params are used to complete the ray start: ray start --block rayStartParams: {} #pod template diff --git a/ray-operator/config/webhook/kustomization.yaml b/ray-operator/config/webhook/kustomization.yaml index 9750d9630df..fa537c84c34 100644 --- a/ray-operator/config/webhook/kustomization.yaml +++ b/ray-operator/config/webhook/kustomization.yaml @@ -16,3 +16,5 @@ patches: kind: ValidatingWebhookConfiguration name: validating-webhook-configuration version: v1 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization diff --git a/ray-operator/controllers/ray/batchscheduler/interface/interface.go b/ray-operator/controllers/ray/batchscheduler/interface/interface.go index 6f991edc885..2e9bafa4be7 100644 --- a/ray-operator/controllers/ray/batchscheduler/interface/interface.go +++ b/ray-operator/controllers/ray/batchscheduler/interface/interface.go @@ -3,11 +3,12 @@ package schedulerinterface import ( "context" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/builder" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) // BatchScheduler manages submitting RayCluster pods to a third-party scheduler. @@ -22,7 +23,7 @@ type BatchScheduler interface { // AddMetadataToPod enriches Pod specs with metadata necessary to tie them to the scheduler. // For example, setting labels for queues / priority, and setting schedulerName. - AddMetadataToPod(app *rayv1.RayCluster, groupName string, pod *corev1.Pod) + AddMetadataToPod(ctx context.Context, app *rayv1.RayCluster, groupName string, pod *corev1.Pod) } // BatchSchedulerFactory handles initial setup of the scheduler plugin by registering the @@ -51,18 +52,18 @@ func (d *DefaultBatchScheduler) Name() string { return GetDefaultPluginName() } -func (d *DefaultBatchScheduler) DoBatchSchedulingOnSubmission(ctx context.Context, app *rayv1.RayCluster) error { +func (d *DefaultBatchScheduler) DoBatchSchedulingOnSubmission(_ context.Context, _ *rayv1.RayCluster) error { return nil } -func (d *DefaultBatchScheduler) AddMetadataToPod(app *rayv1.RayCluster, groupName string, pod *corev1.Pod) { +func (d *DefaultBatchScheduler) AddMetadataToPod(_ context.Context, _ *rayv1.RayCluster, _ string, _ *corev1.Pod) { } -func (df *DefaultBatchSchedulerFactory) New(config *rest.Config) (BatchScheduler, error) { +func (df *DefaultBatchSchedulerFactory) New(_ *rest.Config) (BatchScheduler, error) { return &DefaultBatchScheduler{}, nil } -func (df *DefaultBatchSchedulerFactory) AddToScheme(scheme *runtime.Scheme) { +func (df *DefaultBatchSchedulerFactory) AddToScheme(_ *runtime.Scheme) { } func (df *DefaultBatchSchedulerFactory) ConfigureReconciler(b *builder.Builder) *builder.Builder { diff --git a/ray-operator/controllers/ray/batchscheduler/schedulermanager.go b/ray-operator/controllers/ray/batchscheduler/schedulermanager.go index bdc58b9df17..8501949b7ec 100644 --- a/ray-operator/controllers/ray/batchscheduler/schedulermanager.go +++ b/ray-operator/controllers/ray/batchscheduler/schedulermanager.go @@ -5,85 +5,87 @@ import ( "sync" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/builder" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" + configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn" + + "k8s.io/client-go/rest" + + schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" ) -var schedulerContainers = map[string]schedulerinterface.BatchSchedulerFactory{ - schedulerinterface.GetDefaultPluginName(): &schedulerinterface.DefaultBatchSchedulerFactory{}, - volcano.GetPluginName(): &volcano.VolcanoBatchSchedulerFactory{}, +type SchedulerManager struct { + config *rest.Config + factory schedulerinterface.BatchSchedulerFactory + scheduler schedulerinterface.BatchScheduler + rayConfigs configapi.Configuration + sync.Mutex } -func GetRegisteredNames() []string { - var pluginNames []string - for key := range schedulerContainers { - pluginNames = append(pluginNames, key) +// NewSchedulerManager maintains a specific scheduler plugin based on config +func NewSchedulerManager(rayConfigs configapi.Configuration, config *rest.Config) (*SchedulerManager, error) { + // init the scheduler factory from config + factory, err := getSchedulerFactory(rayConfigs) + if err != nil { + return nil, err } - return pluginNames -} -func ConfigureReconciler(b *builder.Builder) *builder.Builder { - for _, factory := range schedulerContainers { - b = factory.ConfigureReconciler(b) + scheduler, err := factory.New(config) + if err != nil { + return nil, err } - return b -} -func AddToScheme(scheme *runtime.Scheme) { - for _, factory := range schedulerContainers { - factory.AddToScheme(scheme) + manager := SchedulerManager{ + rayConfigs: rayConfigs, + config: config, + factory: factory, + scheduler: scheduler, } -} -type SchedulerManager struct { - sync.Mutex - config *rest.Config - plugins map[string]schedulerinterface.BatchScheduler + return &manager, nil } -func NewSchedulerManager(config *rest.Config) *SchedulerManager { - manager := SchedulerManager{ - config: config, - plugins: make(map[string]schedulerinterface.BatchScheduler), +func getSchedulerFactory(rayConfigs configapi.Configuration) (schedulerinterface.BatchSchedulerFactory, error) { + var factory schedulerinterface.BatchSchedulerFactory + + // when a batch scheduler name is provided + // only support a white list of names, empty value is the default value + // it throws error if an unknown name is provided + if len(rayConfigs.BatchScheduler) > 0 { + switch rayConfigs.BatchScheduler { + case volcano.GetPluginName(): + factory = &volcano.VolcanoBatchSchedulerFactory{} + case yunikorn.GetPluginName(): + factory = &yunikorn.YuniKornSchedulerFactory{} + default: + return nil, fmt.Errorf("the scheduler is not supported, name=%s", rayConfigs.BatchScheduler) + } + } else { + // empty is the default value, when not set + // use DefaultBatchSchedulerFactory, it's a no-opt factory + factory = &schedulerinterface.DefaultBatchSchedulerFactory{} } - return &manager -} -func (batch *SchedulerManager) GetSchedulerForCluster(app *rayv1.RayCluster) (schedulerinterface.BatchScheduler, error) { - if schedulerName, ok := app.ObjectMeta.Labels[utils.RaySchedulerName]; ok { - return batch.GetScheduler(schedulerName) + // legacy option, if this is enabled, register volcano + // this is for backward compatibility + if rayConfigs.EnableBatchScheduler { + factory = &volcano.VolcanoBatchSchedulerFactory{} } - // no scheduler provided - return &schedulerinterface.DefaultBatchScheduler{}, nil + return factory, nil } -func (batch *SchedulerManager) GetScheduler(schedulerName string) (schedulerinterface.BatchScheduler, error) { - factory, registered := schedulerContainers[schedulerName] - if !registered { - return nil, fmt.Errorf("unregistered scheduler plugin %s", schedulerName) - } +func (batch *SchedulerManager) GetSchedulerForCluster() (schedulerinterface.BatchScheduler, error) { + return batch.scheduler, nil +} - batch.Lock() - defer batch.Unlock() +func (batch *SchedulerManager) ConfigureReconciler(b *builder.Builder) *builder.Builder { + batch.factory.ConfigureReconciler(b) + return b +} - if plugin, existed := batch.plugins[schedulerName]; existed && plugin != nil { - return plugin, nil - } else if existed && plugin == nil { - return nil, fmt.Errorf( - "failed to get scheduler plugin %s, previous initialization has failed", schedulerName) - } else { - if plugin, err := factory.New(batch.config); err != nil { - batch.plugins[schedulerName] = nil - return nil, err - } else { - batch.plugins[schedulerName] = plugin - return plugin, nil - } - } +func (batch *SchedulerManager) AddToScheme(scheme *runtime.Scheme) { + batch.factory.AddToScheme(scheme) } diff --git a/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go b/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go new file mode 100644 index 00000000000..1eb18255f15 --- /dev/null +++ b/ray-operator/controllers/ray/batchscheduler/schedulermanager_test.go @@ -0,0 +1,143 @@ +package batchscheduler + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" + schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn" +) + +func TestGetSchedulerFactory(t *testing.T) { + DefaultFactory := &schedulerinterface.DefaultBatchSchedulerFactory{} + VolcanoFactory := &volcano.VolcanoBatchSchedulerFactory{} + YuniKornFactory := &yunikorn.YuniKornSchedulerFactory{} + + type args struct { + rayConfigs v1alpha1.Configuration + } + tests := []struct { + want reflect.Type + name string + expectedErrMsg string + args args + }{ + { + name: "enableBatchScheduler=false, batchScheduler=''", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: false, + BatchScheduler: "", + }, + }, + want: reflect.TypeOf(DefaultFactory), + }, + { + name: "enableBatchScheduler=false, batchScheduler not set", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: false, + }, + }, + want: reflect.TypeOf(DefaultFactory), + }, + { + name: "enableBatchScheduler=false, batchScheduler set to yunikorn", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: false, + BatchScheduler: yunikorn.GetPluginName(), + }, + }, + want: reflect.TypeOf(YuniKornFactory), + }, + { + name: "enableBatchScheduler=false, batchScheduler set to volcano", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: false, + BatchScheduler: volcano.GetPluginName(), + }, + }, + want: reflect.TypeOf(VolcanoFactory), + }, + { + name: "enableBatchScheduler not set, batchScheduler set to yunikorn", + args: args{ + rayConfigs: v1alpha1.Configuration{ + BatchScheduler: yunikorn.GetPluginName(), + }, + }, + want: reflect.TypeOf(YuniKornFactory), + }, + { + name: "enableBatchScheduler not set, batchScheduler set to volcano", + args: args{ + rayConfigs: v1alpha1.Configuration{ + BatchScheduler: volcano.GetPluginName(), + }, + }, + want: reflect.TypeOf(VolcanoFactory), + }, + { + name: "enableBatchScheduler not set, batchScheduler set to unknown value", + args: args{ + rayConfigs: v1alpha1.Configuration{ + BatchScheduler: "unknown-scheduler-name", + }, + }, + expectedErrMsg: "the scheduler is not supported, name=unknown-scheduler-name", + }, + { + // for backwards compatibility, if enableBatchScheduler=true, always use volcano + name: "enableBatchScheduler=true, batchScheduler set to yunikorn", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: true, + BatchScheduler: yunikorn.GetPluginName(), + }, + }, + want: reflect.TypeOf(VolcanoFactory), + }, + { + // for backwards compatibility, if enableBatchScheduler=true, always use volcano + name: "enableBatchScheduler=true, batchScheduler set to volcano", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: true, + BatchScheduler: volcano.GetPluginName(), + }, + }, + want: reflect.TypeOf(VolcanoFactory), + }, + { + // for backwards compatibility, if enableBatchScheduler=true, always use volcano + name: "enableBatchScheduler=true, batchScheduler set to volcano", + args: args{ + rayConfigs: v1alpha1.Configuration{ + EnableBatchScheduler: true, + BatchScheduler: "", + }, + }, + want: reflect.TypeOf(VolcanoFactory), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := getSchedulerFactory(tt.args.rayConfigs) + if len(tt.expectedErrMsg) > 0 { + assert.Errorf(t, err, tt.expectedErrMsg) + return + } + + if reflect.TypeOf(got) != tt.want { + t.Errorf("getSchedulerFactory() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler.go b/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler.go index 7829cd20737..6c7763ab9db 100644 --- a/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler.go +++ b/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler.go @@ -12,17 +12,19 @@ import ( "k8s.io/client-go/rest" "github.com/go-logr/logr" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "sigs.k8s.io/controller-runtime/pkg/builder" volcanov1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" "volcano.sh/apis/pkg/apis/scheduling/v1beta1" volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" - schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" quotav1 "k8s.io/apiserver/pkg/quota/v1" logf "sigs.k8s.io/controller-runtime/pkg/log" + + schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) const ( @@ -49,7 +51,7 @@ func (v *VolcanoBatchScheduler) Name() string { func (v *VolcanoBatchScheduler) DoBatchSchedulingOnSubmission(ctx context.Context, app *rayv1.RayCluster) error { var minMember int32 var totalResource corev1.ResourceList - if app.Spec.EnableInTreeAutoscaling == nil || !*app.Spec.EnableInTreeAutoscaling { + if !utils.IsAutoscalingEnabled(app) { minMember = utils.CalculateDesiredReplicas(ctx, app) + 1 totalResource = utils.CalculateDesiredResources(app) } else { @@ -57,10 +59,7 @@ func (v *VolcanoBatchScheduler) DoBatchSchedulingOnSubmission(ctx context.Contex totalResource = utils.CalculateMinResources(app) } - if err := v.syncPodGroup(app, minMember, totalResource); err != nil { - return err - } - return nil + return v.syncPodGroup(app, minMember, totalResource) } func getAppPodGroupName(app *rayv1.RayCluster) string { @@ -135,7 +134,7 @@ func createPodGroup( return podGroup } -func (v *VolcanoBatchScheduler) AddMetadataToPod(app *rayv1.RayCluster, groupName string, pod *corev1.Pod) { +func (v *VolcanoBatchScheduler) AddMetadataToPod(_ context.Context, app *rayv1.RayCluster, groupName string, pod *corev1.Pod) { pod.Annotations[v1beta1.KubeGroupNameAnnotationKey] = getAppPodGroupName(app) pod.Annotations[volcanov1alpha1.TaskSpecKey] = groupName if queue, ok := app.ObjectMeta.Labels[QueueNameLabelKey]; ok { @@ -150,12 +149,12 @@ func (v *VolcanoBatchScheduler) AddMetadataToPod(app *rayv1.RayCluster, groupNam func (vf *VolcanoBatchSchedulerFactory) New(config *rest.Config) (schedulerinterface.BatchScheduler, error) { vkClient, err := volcanoclient.NewForConfig(config) if err != nil { - return nil, fmt.Errorf("failed to initialize volcano client with error %v", err) + return nil, fmt.Errorf("failed to initialize volcano client with error %w", err) } extClient, err := apiextensionsclient.NewForConfig(config) if err != nil { - return nil, fmt.Errorf("failed to initialize k8s extension client with error %v", err) + return nil, fmt.Errorf("failed to initialize k8s extension client with error %w", err) } if _, err := extClient.ApiextensionsV1().CustomResourceDefinitions().Get( @@ -168,7 +167,7 @@ func (vf *VolcanoBatchSchedulerFactory) New(config *rest.Config) (schedulerinter PodGroupName, metav1.GetOptions{}, ); err != nil { - return nil, fmt.Errorf("podGroup CRD is required to exist in current cluster. error: %s", err) + return nil, fmt.Errorf("podGroup CRD is required to exist in current cluster. error: %w", err) } } return &VolcanoBatchScheduler{ diff --git a/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler_test.go b/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler_test.go index 3d6f6f4538d..2023a408202 100644 --- a/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler_test.go +++ b/ray-operator/controllers/ray/batchscheduler/volcano/volcano_scheduler_test.go @@ -4,13 +4,14 @@ import ( "context" "testing" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) func TestCreatePodGroup(t *testing.T) { @@ -69,9 +70,9 @@ func TestCreatePodGroup(t *testing.T) { Template: corev1.PodTemplateSpec{ Spec: workerSpec, }, - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), + Replicas: ptr.To[int32](2), + MinReplicas: ptr.To[int32](1), + MaxReplicas: ptr.To[int32](4), }, }, }, diff --git a/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler.go b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler.go new file mode 100644 index 00000000000..beeef73b60c --- /dev/null +++ b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler.go @@ -0,0 +1,118 @@ +package yunikorn + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" +) + +const ( + SchedulerName string = "yunikorn" + YuniKornPodApplicationIDLabelName string = "applicationId" + YuniKornPodQueueLabelName string = "queue" + RayClusterApplicationIDLabelName string = "yunikorn.apache.org/app-id" + RayClusterQueueLabelName string = "yunikorn.apache.org/queue" + YuniKornTaskGroupNameAnnotationName string = "yunikorn.apache.org/task-group-name" + YuniKornTaskGroupsAnnotationName string = "yunikorn.apache.org/task-groups" +) + +type YuniKornScheduler struct{} + +type YuniKornSchedulerFactory struct{} + +func GetPluginName() string { + return SchedulerName +} + +func (y *YuniKornScheduler) Name() string { + return GetPluginName() +} + +func (y *YuniKornScheduler) DoBatchSchedulingOnSubmission(_ context.Context, _ *rayv1.RayCluster) error { + // yunikorn doesn't require any resources to be created upfront + // this is a no-opt for this implementation + return nil +} + +// populatePodLabels is a helper function that copies RayCluster's label to the given pod based on the label key +// TODO: remove the legacy labels, i.e "applicationId" and "queue", directly populate labels +// RayClusterApplicationIDLabelName to RayClusterQueueLabelName to pod labels. +// Currently we use this function to translate labels "yunikorn.apache.org/app-id" and "yunikorn.apache.org/queue" +// to legacy labels "applicationId" and "queue", this is for the better compatibilities to support older yunikorn +// versions. +func (y *YuniKornScheduler) populatePodLabels(ctx context.Context, app *rayv1.RayCluster, pod *corev1.Pod, sourceKey string, targetKey string) { + logger := ctrl.LoggerFrom(ctx).WithName(SchedulerName) + // check labels + if value, exist := app.Labels[sourceKey]; exist { + logger.Info("Updating pod label based on RayCluster labels", + "sourceKey", sourceKey, "targetKey", targetKey, "value", value) + pod.Labels[targetKey] = value + } +} + +// AddMetadataToPod adds essential labels and annotations to the Ray pods +// the yunikorn scheduler needs these labels and annotations in order to do the scheduling properly +func (y *YuniKornScheduler) AddMetadataToPod(ctx context.Context, app *rayv1.RayCluster, groupName string, pod *corev1.Pod) { + // the applicationID and queue name must be provided in the labels + y.populatePodLabels(ctx, app, pod, RayClusterApplicationIDLabelName, YuniKornPodApplicationIDLabelName) + y.populatePodLabels(ctx, app, pod, RayClusterQueueLabelName, YuniKornPodQueueLabelName) + pod.Spec.SchedulerName = y.Name() + + // when gang scheduling is enabled, extra annotations need to be added to all pods + if y.isGangSchedulingEnabled(app) { + // populate the taskGroups info to each pod + y.populateTaskGroupsAnnotationToPod(ctx, app, pod) + + // set the task group name based on the head or worker group name + // the group name for the head and each of the worker group should be different + pod.Annotations[YuniKornTaskGroupNameAnnotationName] = groupName + } +} + +func (y *YuniKornScheduler) isGangSchedulingEnabled(app *rayv1.RayCluster) bool { + _, exist := app.Labels[utils.RayClusterGangSchedulingEnabled] + return exist +} + +func (y *YuniKornScheduler) populateTaskGroupsAnnotationToPod(ctx context.Context, app *rayv1.RayCluster, pod *corev1.Pod) { + logger := ctrl.LoggerFrom(ctx).WithName(SchedulerName) + taskGroups := newTaskGroupsFromApp(app) + taskGroupsAnnotationValue, err := taskGroups.marshal() + if err != nil { + logger.Error(err, "failed to add gang scheduling related annotations to pod, "+ + "gang scheduling will not be enabled for this workload", + "name", pod.Name, "namespace", pod.Namespace) + return + } + + logger.Info("add task groups info to pod's annotation", + "key", YuniKornTaskGroupsAnnotationName, + "value", taskGroupsAnnotationValue, + "numOfTaskGroups", taskGroups.size()) + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + pod.Annotations[YuniKornTaskGroupsAnnotationName] = taskGroupsAnnotationValue + + logger.Info("Gang Scheduling enabled for RayCluster") +} + +func (yf *YuniKornSchedulerFactory) New(_ *rest.Config) (schedulerinterface.BatchScheduler, error) { + return &YuniKornScheduler{}, nil +} + +func (yf *YuniKornSchedulerFactory) AddToScheme(_ *runtime.Scheme) { + // No extra scheme needs to be registered +} + +func (yf *YuniKornSchedulerFactory) ConfigureReconciler(b *builder.Builder) *builder.Builder { + return b +} diff --git a/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler_test.go b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler_test.go new file mode 100644 index 00000000000..dfd37d78705 --- /dev/null +++ b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler_test.go @@ -0,0 +1,283 @@ +package yunikorn + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +func TestPopulatePodLabels(t *testing.T) { + yk := &YuniKornScheduler{} + ctx := context.Background() + // --- case 1 + // Ray Cluster CR has labels defined + job1 := "job-1-01234" + queue1 := "root.default" + + rayCluster1 := createRayClusterWithLabels( + "ray-cluster-with-labels", + "test", + map[string]string{ + RayClusterApplicationIDLabelName: job1, + RayClusterQueueLabelName: queue1, + }, + ) + + rayPod := createPod("my-pod-1", "test") + yk.populatePodLabels(ctx, rayCluster1, rayPod, RayClusterApplicationIDLabelName, YuniKornPodApplicationIDLabelName) + yk.populatePodLabels(ctx, rayCluster1, rayPod, RayClusterQueueLabelName, YuniKornPodQueueLabelName) + assert.Equal(t, podLabelsContains(rayPod, YuniKornPodApplicationIDLabelName, job1), true) + assert.Equal(t, podLabelsContains(rayPod, YuniKornPodQueueLabelName, queue1), true) + + // --- case 2 + // Ray Cluster CR has nothing + // In this case, the pod will not be populated with the required labels + job2 := "job-2-01234" + queue2 := "root.default" + + rayCluster2 := createRayClusterWithLabels( + "ray-cluster-without-labels", + "test1", + nil, // empty labels + ) + rayPod3 := createPod("my-pod-2", "test") + yk.populatePodLabels(ctx, rayCluster2, rayPod3, RayClusterApplicationIDLabelName, YuniKornPodApplicationIDLabelName) + yk.populatePodLabels(ctx, rayCluster2, rayPod3, RayClusterQueueLabelName, YuniKornPodQueueLabelName) + assert.Equal(t, podLabelsContains(rayPod3, YuniKornPodApplicationIDLabelName, job2), false) + assert.Equal(t, podLabelsContains(rayPod3, YuniKornPodQueueLabelName, queue2), false) +} + +func TestIsGangSchedulingEnabled(t *testing.T) { + yk := &YuniKornScheduler{} + + job1 := "job-1-01234" + queue1 := "root.default" + rayCluster1 := createRayClusterWithLabels( + "ray-cluster-with-gang-scheduling", + "test1", + map[string]string{ + RayClusterApplicationIDLabelName: job1, + RayClusterQueueLabelName: queue1, + utils.RayClusterGangSchedulingEnabled: "true", + }, + ) + + assert.Equal(t, yk.isGangSchedulingEnabled(rayCluster1), true) + + rayCluster2 := createRayClusterWithLabels( + "ray-cluster-with-gang-scheduling", + "test2", + map[string]string{ + RayClusterApplicationIDLabelName: job1, + RayClusterQueueLabelName: queue1, + utils.RayClusterGangSchedulingEnabled: "", + }, + ) + + assert.Equal(t, yk.isGangSchedulingEnabled(rayCluster2), true) + + rayCluster3 := createRayClusterWithLabels( + "ray-cluster-with-gang-scheduling", + "test3", + map[string]string{ + RayClusterApplicationIDLabelName: job1, + RayClusterQueueLabelName: queue1, + }, + ) + + assert.Equal(t, yk.isGangSchedulingEnabled(rayCluster3), false) +} + +func TestPopulateGangSchedulingAnnotations(t *testing.T) { + yk := &YuniKornScheduler{} + ctx := context.Background() + + job1 := "job-1-01234" + queue1 := "root.default" + + // test the case when gang-scheduling is enabled + rayClusterWithGangScheduling := createRayClusterWithLabels( + "ray-cluster-with-gang-scheduling", + "test3", + map[string]string{ + RayClusterApplicationIDLabelName: job1, + RayClusterQueueLabelName: queue1, + utils.RayClusterGangSchedulingEnabled: "true", + }, + ) + + // head pod: + // cpu: 5 + // memory: 5Gi + addHeadPodSpec(rayClusterWithGangScheduling, v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5"), + v1.ResourceMemory: resource.MustParse("5Gi"), + }) + + // worker pod: + // cpu: 2 + // memory: 10Gi + // nvidia.com/gpu: 1 + addWorkerPodSpec(rayClusterWithGangScheduling, + "worker-group-1", 1, 1, 2, v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("10Gi"), + "nvidia.com/gpu": resource.MustParse("1"), + }) + + // gang-scheduling enabled case, the plugin should populate the taskGroup annotation to the app + rayPod := createPod("ray-pod", "default") + yk.populateTaskGroupsAnnotationToPod(ctx, rayClusterWithGangScheduling, rayPod) + + kk, err := getTaskGroupsFromAnnotation(rayPod) + assert.NoError(t, err) + assert.Equal(t, len(kk), 2) + // verify the annotation value + taskGroupsSpec := rayPod.Annotations[YuniKornTaskGroupsAnnotationName] + assert.Equal(t, true, len(taskGroupsSpec) > 0) + taskGroups := newTaskGroups() + err = taskGroups.unmarshalFrom(taskGroupsSpec) + assert.NoError(t, err) + assert.Equal(t, len(taskGroups.Groups), 2) + + // verify the correctness of head group + headGroup := taskGroups.getTaskGroup(utils.RayNodeHeadGroupLabelValue) + assert.NotNil(t, headGroup) + assert.Equal(t, int32(1), headGroup.MinMember) + assert.Equal(t, resource.MustParse("5"), headGroup.MinResource[v1.ResourceCPU.String()]) + assert.Equal(t, resource.MustParse("5Gi"), headGroup.MinResource[v1.ResourceMemory.String()]) + + // verify the correctness of worker group + workerGroup := taskGroups.getTaskGroup("worker-group-1") + assert.NotNil(t, workerGroup) + assert.Equal(t, int32(1), workerGroup.MinMember) + assert.Equal(t, resource.MustParse("2"), workerGroup.MinResource[v1.ResourceCPU.String()]) + assert.Equal(t, resource.MustParse("10Gi"), workerGroup.MinResource[v1.ResourceMemory.String()]) + assert.Equal(t, resource.MustParse("1"), workerGroup.MinResource["nvidia.com/gpu"]) +} + +func createRayClusterWithLabels(name string, namespace string, labels map[string]string) *rayv1.RayCluster { + rayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: labels, + }, + } + + return rayCluster +} + +func addHeadPodSpec(app *rayv1.RayCluster, resource v1.ResourceList) { + // app.Spec.HeadGroupSpec.Template.Spec.Containers + headContainers := []v1.Container{ + { + Name: "head-pod", + Image: "ray.io/ray-head:latest", + Resources: v1.ResourceRequirements{ + Limits: nil, + Requests: resource, + }, + }, + } + + app.Spec.HeadGroupSpec.Template.Spec.Containers = headContainers +} + +func addWorkerPodSpec(app *rayv1.RayCluster, workerGroupName string, + replicas int32, minReplicas int32, maxReplicas int32, resources v1.ResourceList, +) { + workerContainers := []v1.Container{ + { + Name: "worker-pod", + Image: "ray.io/ray-head:latest", + Resources: v1.ResourceRequirements{ + Limits: nil, + Requests: resources, + }, + }, + } + + app.Spec.WorkerGroupSpecs = append(app.Spec.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ + GroupName: workerGroupName, + Replicas: &replicas, + MinReplicas: &minReplicas, + MaxReplicas: &maxReplicas, + Template: v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Containers: workerContainers, + }, + }, + }) +} + +func createPod(name string, namespace string) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: make(map[string]string), + Annotations: make(map[string]string), + }, + } +} + +func podLabelsContains(pod *v1.Pod, key string, value string) bool { + if pod == nil { + return false + } + + if len(pod.Labels) > 0 { + labelValue, exist := pod.Labels[key] + if exist { + if labelValue == value { + return true + } + } + } + + return false +} + +func getTaskGroupsFromAnnotation(pod *v1.Pod) ([]TaskGroup, error) { + taskGroupInfo, exist := pod.Annotations[YuniKornTaskGroupsAnnotationName] + if !exist { + return nil, fmt.Errorf("not found") + } + + taskGroups := []TaskGroup{} + err := json.Unmarshal([]byte(taskGroupInfo), &taskGroups) + if err != nil { + return nil, err + } + // json.Unmarshal won't return error if name or MinMember is empty, but will return error if MinResource is empty or error format. + for _, taskGroup := range taskGroups { + if taskGroup.Name == "" { + return nil, fmt.Errorf("can't get taskGroup Name from pod annotation, %s", + taskGroupInfo) + } + if taskGroup.MinResource == nil { + return nil, fmt.Errorf("can't get taskGroup MinResource from pod annotation, %s", + taskGroupInfo) + } + if taskGroup.MinMember == int32(0) { + return nil, fmt.Errorf("can't get taskGroup MinMember from pod annotation, %s", + taskGroupInfo) + } + if taskGroup.MinMember < int32(0) { + return nil, fmt.Errorf("minMember cannot be negative, %s", + taskGroupInfo) + } + } + return taskGroups, nil +} diff --git a/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_task_groups.go b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_task_groups.go new file mode 100644 index 00000000000..57a5914c530 --- /dev/null +++ b/ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_task_groups.go @@ -0,0 +1,97 @@ +package yunikorn + +import ( + "encoding/json" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" +) + +// TaskGroups is a list of task Groups recognized as gang Groups +type TaskGroups struct { + Groups []TaskGroup `json:"groups"` +} + +// TaskGroup is the struct for yunikorn to consider a pod belongs to a gang group +// the original schema is defined here: https://github.com/apache/yunikorn-k8shim/blob/master/pkg/cache/amprotocol.go +type TaskGroup struct { + MinResource map[string]resource.Quantity `json:"minResource"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Affinity *corev1.Affinity `json:"affinity,omitempty"` + Name string `json:"name"` + TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + MinMember int32 `json:"minMember"` +} + +func newTaskGroups() *TaskGroups { + return &TaskGroups{ + Groups: make([]TaskGroup, 0), + } +} + +func newTaskGroupsFromApp(app *v1.RayCluster) *TaskGroups { + taskGroups := newTaskGroups() + + // head group + headGroupSpec := app.Spec.HeadGroupSpec + headPodMinResource := utils.CalculatePodResource(headGroupSpec.Template.Spec) + taskGroups.addTaskGroup( + TaskGroup{ + Name: utils.RayNodeHeadGroupLabelValue, + MinMember: 1, + MinResource: utils.ConvertResourceListToMapString(headPodMinResource), + NodeSelector: headGroupSpec.Template.Spec.NodeSelector, + Tolerations: headGroupSpec.Template.Spec.Tolerations, + Affinity: headGroupSpec.Template.Spec.Affinity, + }) + + // worker groups + for _, workerGroupSpec := range app.Spec.WorkerGroupSpecs { + workerMinResource := utils.CalculatePodResource(workerGroupSpec.Template.Spec) + minWorkers := workerGroupSpec.MinReplicas + taskGroups.addTaskGroup( + TaskGroup{ + Name: workerGroupSpec.GroupName, + MinMember: *minWorkers, + MinResource: utils.ConvertResourceListToMapString(workerMinResource), + NodeSelector: workerGroupSpec.Template.Spec.NodeSelector, + Tolerations: workerGroupSpec.Template.Spec.Tolerations, + Affinity: workerGroupSpec.Template.Spec.Affinity, + }) + } + + return taskGroups +} + +func (t *TaskGroups) size() int { + return len(t.Groups) +} + +func (t *TaskGroups) addTaskGroup(taskGroup TaskGroup) { + t.Groups = append(t.Groups, taskGroup) +} + +func (t *TaskGroups) marshal() (string, error) { + result, err := json.Marshal(t.Groups) + if err != nil { + return "", err + } + return string(result), nil +} + +func (t *TaskGroups) unmarshalFrom(spec string) error { + return json.Unmarshal([]byte(spec), &t.Groups) +} + +func (t *TaskGroups) getTaskGroup(name string) TaskGroup { + for _, group := range t.Groups { + if group.Name == name { + return group + } + } + return TaskGroup{} +} diff --git a/ray-operator/controllers/ray/common/association.go b/ray-operator/controllers/ray/common/association.go index b804df814e8..63eefa94bc4 100644 --- a/ray-operator/controllers/ray/common/association.go +++ b/ray-operator/controllers/ray/common/association.go @@ -1,10 +1,17 @@ package common import ( - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) func RayClusterServeServiceNamespacedName(instance *rayv1.RayCluster) types.NamespacedName { @@ -33,6 +40,17 @@ func RayClusterHeadlessServiceListOptions(instance *rayv1.RayCluster) []client.L } } +func RayClusterHeadServiceListOptions(instance *rayv1.RayCluster) []client.ListOption { + return []client.ListOption{ + client.InNamespace(instance.Namespace), + client.MatchingLabels(map[string]string{ + utils.RayClusterLabelKey: instance.Name, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(instance.Name, rayv1.HeadNode)), + }), + } +} + type AssociationOption interface { client.ListOption client.DeleteAllOfOption @@ -54,6 +72,14 @@ func (list AssociationOptions) ToDeleteOptions() (options []client.DeleteAllOfOp return options } +func (list AssociationOptions) ToMetaV1ListOptions() (options metav1.ListOptions) { + listOptions := client.ListOptions{} + for _, option := range list { + option.(client.ListOption).ApplyToList(&listOptions) + } + return *listOptions.AsListOptions() +} + func RayClusterHeadPodsAssociationOptions(instance *rayv1.RayCluster) AssociationOptions { return AssociationOptions{ client.InNamespace(instance.Namespace), @@ -74,6 +100,16 @@ func RayClusterWorkerPodsAssociationOptions(instance *rayv1.RayCluster) Associat } } +func RayClusterRedisCleanupJobAssociationOptions(instance *rayv1.RayCluster) AssociationOptions { + return AssociationOptions{ + client.InNamespace(instance.Namespace), + client.MatchingLabels{ + utils.RayClusterLabelKey: instance.Name, + utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode), + }, + } +} + func RayClusterGroupPodsAssociationOptions(instance *rayv1.RayCluster, group string) AssociationOptions { return AssociationOptions{ client.InNamespace(instance.Namespace), @@ -93,6 +129,16 @@ func RayClusterAllPodsAssociationOptions(instance *rayv1.RayCluster) Association } } +func RayServiceRayClustersAssociationOptions(rayService *rayv1.RayService) AssociationOptions { + return AssociationOptions{ + client.InNamespace(rayService.Namespace), + client.MatchingLabels{ + utils.RayOriginatedFromCRNameLabelKey: rayService.Name, + utils.RayOriginatedFromCRDLabelKey: utils.RayOriginatedFromCRDLabelValue(utils.RayServiceCRD), + }, + } +} + func RayServiceServeServiceNamespacedName(rayService *rayv1.RayService) types.NamespacedName { if rayService.Spec.ServeService != nil && rayService.Spec.ServeService.Name != "" { return types.NamespacedName{ @@ -128,3 +174,32 @@ func RayJobRayClusterNamespacedName(rayJob *rayv1.RayJob) types.NamespacedName { Namespace: rayJob.Namespace, } } + +// GetRayClusterHeadPod gets a *corev1.Pod from a *rayv1.RayCluster. Note that it returns (nil, nil) in the case of no head pod exists. +func GetRayClusterHeadPod(ctx context.Context, reader client.Reader, instance *rayv1.RayCluster) (*corev1.Pod, error) { + logger := ctrl.LoggerFrom(ctx) + + runtimePods := corev1.PodList{} + filterLabels := RayClusterHeadPodsAssociationOptions(instance) + if err := reader.List(ctx, &runtimePods, filterLabels.ToListOptions()...); err != nil { + return nil, err + } + if len(runtimePods.Items) == 0 { + logger.Info("Found 0 head pod", "filter labels", filterLabels) + return nil, nil + } + if len(runtimePods.Items) > 1 { + logger.Info("Found multiple head pods", "count", len(runtimePods.Items), "filter labels", filterLabels) + return nil, fmt.Errorf("found multiple heads. filter labels %v", filterLabels) + } + return &runtimePods.Items[0], nil +} + +func RayClusterNetworkResourcesOptions(instance *rayv1.RayCluster) AssociationOptions { + return AssociationOptions{ + client.InNamespace(instance.Namespace), + client.MatchingLabels{ + utils.RayClusterLabelKey: instance.Name, + }, + } +} diff --git a/ray-operator/controllers/ray/common/association_test.go b/ray-operator/controllers/ray/common/association_test.go index b84af21b94a..506107a72d6 100644 --- a/ray-operator/controllers/ray/common/association_test.go +++ b/ray-operator/controllers/ray/common/association_test.go @@ -5,13 +5,17 @@ import ( "reflect" "testing" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + routev1 "github.com/openshift/api/route/v1" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) func TestRayServiceServeServiceNamespacedName(t *testing.T) { @@ -127,8 +131,7 @@ func TestRayClusterHeadlessServiceListOptions(t *testing.T) { Namespace: "test-ns", }, } - headlessSvc, err := BuildHeadlessServiceForRayCluster(*instance) - assert.Nil(t, err) + headlessSvc := BuildHeadlessServiceForRayCluster(*instance) rayClusterName := "" for k, v := range headlessSvc.Labels { @@ -149,6 +152,28 @@ func TestRayClusterHeadlessServiceListOptions(t *testing.T) { } } +func TestRayClusterHeadServiceListOptions(t *testing.T) { + instance := rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster", + Namespace: "test-ns", + }, + } + + labels := HeadServiceLabels(instance) + delete(labels, utils.KubernetesCreatedByLabelKey) + delete(labels, utils.KubernetesApplicationNameLabelKey) + + expected := []client.ListOption{ + client.InNamespace(instance.Namespace), + client.MatchingLabels(labels), + } + result := RayClusterHeadServiceListOptions(&instance) + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v, got %v", expected, result) + } +} + // TestRayServiceActiveRayClusterNamespacedName tests the function for generating a NamespacedName for a RayService's active RayCluster func TestRayServiceActiveRayClusterNamespacedName(t *testing.T) { rayService := &rayv1.RayService{ @@ -233,3 +258,109 @@ func TestRayJobRayClusterNamespacedName(t *testing.T) { t.Errorf("Expected %v, got %v", expected, result) } } + +func TestGetRayClusterHeadPod(t *testing.T) { + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + cluster := rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + } + + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "head-pod", + Namespace: cluster.ObjectMeta.Namespace, + Labels: map[string]string{ + utils.RayClusterLabelKey: cluster.Name, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + } + + // Initialize a fake client with newScheme and runtimeObjects. + runtimeObjects := []runtime.Object{headPod} + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.TODO() + + ret, err := GetRayClusterHeadPod(ctx, fakeClient, &cluster) + assert.Nil(t, err) + assert.Equal(t, ret, headPod) +} + +func TestRayClusterRedisCleanupJobAssociationOptions(t *testing.T) { + // Create a new scheme + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + instance := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster-example", + Namespace: "default", + }, + } + + _ = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "redis-cleanup", + Namespace: instance.ObjectMeta.Namespace, + Labels: map[string]string{ + utils.RayClusterLabelKey: instance.Name, + utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode), + }, + }, + } + + expected := []client.ListOption{ + client.InNamespace(instance.ObjectMeta.Namespace), + client.MatchingLabels(map[string]string{ + utils.RayClusterLabelKey: instance.Name, + utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode), + }), + } + result := RayClusterRedisCleanupJobAssociationOptions(instance).ToListOptions() + + assert.Equal(t, expected, result) +} + +func TestRayClusterNetworkResourcesOptions(t *testing.T) { + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + _ = routev1.AddToScheme(newScheme) + instance := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster-example", + Namespace: "default", + Annotations: map[string]string{ + IngressClassAnnotationKey: "nginx", + }, + }, + } + _ = &routev1.Route{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.GenerateRouteName(instance.Name), + Namespace: instance.Namespace, + Labels: map[string]string{ + utils.RayClusterLabelKey: instance.Name, + }, + }, + } + expected := []client.ListOption{ + client.InNamespace(instance.ObjectMeta.Namespace), + client.MatchingLabels(map[string]string{ + utils.RayClusterLabelKey: instance.Name, + }), + } + + result := RayClusterNetworkResourcesOptions(instance).ToListOptions() + + assert.Equal(t, expected, result) +} diff --git a/ray-operator/controllers/ray/common/ingress.go b/ray-operator/controllers/ray/common/ingress.go index 151f197a541..92cdb7da16f 100644 --- a/ray-operator/controllers/ray/common/ingress.go +++ b/ray-operator/controllers/ray/common/ingress.go @@ -2,14 +2,14 @@ package common import ( "context" - "fmt" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" networkingv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) const IngressClassAnnotationKey = "kubernetes.io/ingress.class" @@ -27,15 +27,15 @@ func BuildIngressForHeadService(ctx context.Context, cluster rayv1.RayCluster) ( } // Copy other ingress configurations from cluster annotations to provide a generic way - // for user to customize their ingress settings. The `exclude_set` is used to avoid setting + // for user to customize their ingress settings. The `excludeSet` is used to avoid setting // both IngressClassAnnotationKey annotation which is deprecated and `Spec.IngressClassName` // at the same time. - exclude_set := map[string]struct{}{ + excludeSet := map[string]struct{}{ IngressClassAnnotationKey: {}, } annotation := map[string]string{} for key, value := range cluster.Annotations { - if _, ok := exclude_set[key]; !ok { + if _, ok := excludeSet[key]; !ok { annotation[key] = value } } @@ -90,7 +90,7 @@ func BuildIngressForHeadService(ctx context.Context, cluster rayv1.RayCluster) ( // Get ingress class name from rayCluster annotations. this is a required field to use ingress. ingressClassName, ok := cluster.Annotations[IngressClassAnnotationKey] if !ok { - log.Info(fmt.Sprintf("ingress class annotation is not set for cluster %s/%s", cluster.Namespace, cluster.Name)) + log.Info("Ingress class annotation is not set for the cluster.", "clusterNamespace", cluster.Namespace, "clusterName", cluster.Name) } else { // TODO: in AWS EKS, set up IngressClassName will cause an error due to conflict with annotation. ingress.Spec.IngressClassName = &ingressClassName diff --git a/ray-operator/controllers/ray/common/job.go b/ray-operator/controllers/ray/common/job.go index b932f50cd01..dc4dbe74a33 100644 --- a/ray-operator/controllers/ray/common/job.go +++ b/ray-operator/controllers/ray/common/job.go @@ -3,15 +3,18 @@ package common import ( "encoding/json" "fmt" + "strconv" "strings" semver "github.com/Masterminds/semver/v3" "github.com/google/shlex" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/yaml" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + pkgutils "github.com/ray-project/kuberay/ray-operator/pkg/utils" ) // GetRuntimeEnvJson returns the JSON string of the runtime environment for the Ray job. @@ -20,26 +23,17 @@ func getRuntimeEnvJson(rayJobInstance *rayv1.RayJob) (string, error) { if len(runtimeEnvYAML) > 0 { // Convert YAML to JSON - jsonData, err := yaml.YAMLToJSON([]byte(runtimeEnvYAML)) + jsonData, err := yaml.YAMLToJSON(pkgutils.ConvertStringToByteSlice(runtimeEnvYAML)) if err != nil { return "", err } // We return the JSON as a string - return string(jsonData), nil + return pkgutils.ConvertByteSliceToString(jsonData), nil } return "", nil } -// GetBaseRayJobCommand returns the first part of the Ray Job command up to and including the address, e.g. "ray job submit --address http://..." -func GetBaseRayJobCommand(address string) []string { - // add http:// if needed - if !strings.HasPrefix(address, "http://") { - address = "http://" + address - } - return []string{"ray", "job", "submit", "--address", address} -} - // GetMetadataJson returns the JSON string of the metadata for the Ray job. func GetMetadataJson(metadata map[string]string, rayVersion string) (string, error) { // Check that the Ray version is at least 2.6.0. @@ -48,7 +42,7 @@ func GetMetadataJson(metadata map[string]string, rayVersion string) (string, err constraint, _ := semver.NewConstraint(">= 2.6.0") v, err := semver.NewVersion(rayVersion) if err != nil { - return "", fmt.Errorf("failed to parse Ray version: %v: %v", rayVersion, err) + return "", fmt.Errorf("failed to parse Ray version: %v: %w", rayVersion, err) } if !constraint.Check(v) { return "", fmt.Errorf("the Ray version must be at least 2.6.0 to use the metadata field") @@ -56,9 +50,9 @@ func GetMetadataJson(metadata map[string]string, rayVersion string) (string, err // Convert the metadata map to a JSON string. metadataBytes, err := json.Marshal(metadata) if err != nil { - return "", fmt.Errorf("failed to marshal metadata: %v: %v", metadata, err) + return "", fmt.Errorf("failed to marshal metadata: %v: %w", metadata, err) } - return string(metadataBytes), nil + return pkgutils.ConvertByteSliceToString(metadataBytes), nil } // GetK8sJobCommand builds the K8s job command for the Ray job. @@ -71,14 +65,34 @@ func GetK8sJobCommand(rayJobInstance *rayv1.RayJob) ([]string, error) { entrypointNumGpus := rayJobInstance.Spec.EntrypointNumGpus entrypointResources := rayJobInstance.Spec.EntrypointResources - k8sJobCommand := GetBaseRayJobCommand(address) + // add http:// if needed + if !strings.HasPrefix(address, "http://") { + address = "http://" + address + } + + // `ray job submit` alone doesn't handle duplicated submission gracefully. See https://github.com/ray-project/kuberay/issues/2154. + // In order to deal with that, we use `ray job status` first to check if the jobId has been submitted. + // If the jobId has been submitted, we use `ray job logs` to follow the logs. + // Otherwise, we submit the job normally with `ray job submit`. The full shell command looks like this: + // if ray job status --address http://$RAY_ADDRESS $RAY_JOB_SUBMISSION_ID >/dev/null 2>&1 ; + // then ray job logs --address http://RAY_ADDRESS --follow $RAY_JOB_SUBMISSION_ID ; + // else ray job submit --address http://RAY_ADDRESS --submission-id $RAY_JOB_SUBMISSION_ID -- ... ; + // fi + jobStatusCommand := []string{"ray", "job", "status", "--address", address, jobId, ">/dev/null", "2>&1"} + jobFollowCommand := []string{"ray", "job", "logs", "--address", address, "--follow", jobId} + jobSubmitCommand := []string{"ray", "job", "submit", "--address", address} + k8sJobCommand := append([]string{"if"}, jobStatusCommand...) + k8sJobCommand = append(k8sJobCommand, ";", "then") + k8sJobCommand = append(k8sJobCommand, jobFollowCommand...) + k8sJobCommand = append(k8sJobCommand, ";", "else") + k8sJobCommand = append(k8sJobCommand, jobSubmitCommand...) runtimeEnvJson, err := getRuntimeEnvJson(rayJobInstance) if err != nil { return nil, err } if len(runtimeEnvJson) > 0 { - k8sJobCommand = append(k8sJobCommand, "--runtime-env-json", runtimeEnvJson) + k8sJobCommand = append(k8sJobCommand, "--runtime-env-json", strconv.Quote(runtimeEnvJson)) } if len(metadata) > 0 { @@ -86,7 +100,7 @@ func GetK8sJobCommand(rayJobInstance *rayv1.RayJob) ([]string, error) { if err != nil { return nil, err } - k8sJobCommand = append(k8sJobCommand, "--metadata-json", metadataJson) + k8sJobCommand = append(k8sJobCommand, "--metadata-json", strconv.Quote(metadataJson)) } if len(jobId) > 0 { @@ -102,7 +116,7 @@ func GetK8sJobCommand(rayJobInstance *rayv1.RayJob) ([]string, error) { } if len(entrypointResources) > 0 { - k8sJobCommand = append(k8sJobCommand, "--entrypoint-resources", entrypointResources) + k8sJobCommand = append(k8sJobCommand, "--entrypoint-resources", strconv.Quote(entrypointResources)) } // "--" is used to separate the entrypoint from the Ray Job CLI command and its arguments. @@ -114,6 +128,8 @@ func GetK8sJobCommand(rayJobInstance *rayv1.RayJob) ([]string, error) { } k8sJobCommand = append(k8sJobCommand, commandSlice...) + k8sJobCommand = append(k8sJobCommand, ";", "fi") + return k8sJobCommand, nil } diff --git a/ray-operator/controllers/ray/common/job_test.go b/ray-operator/controllers/ray/common/job_test.go index e56df1b1db5..da41eaeda41 100644 --- a/ray-operator/controllers/ray/common/job_test.go +++ b/ray-operator/controllers/ray/common/job_test.go @@ -2,12 +2,14 @@ package common import ( "encoding/json" + "strconv" "testing" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) var testRayJob = &rayv1.RayJob{ @@ -61,12 +63,6 @@ pip: ["python-multipart==0.0.6"] assert.Equal(t, expectedMap, actualMap) } -func TestGetBaseRayJobCommand(t *testing.T) { - expected := []string{"ray", "job", "submit", "--address", "http://127.0.0.1:8265"} - command := GetBaseRayJobCommand(testRayJob.Status.DashboardURL) - assert.Equal(t, expected, command) -} - func TestGetMetadataJson(t *testing.T) { expected := `{"testKey":"testValue"}` metadataJson, err := GetMetadataJson(testRayJob.Spec.Metadata, testRayJob.Spec.RayClusterSpec.RayVersion) @@ -76,15 +72,21 @@ func TestGetMetadataJson(t *testing.T) { func TestGetK8sJobCommand(t *testing.T) { expected := []string{ + "if", + "ray", "job", "status", "--address", "http://127.0.0.1:8265", "testJobId", ">/dev/null", "2>&1", + ";", "then", + "ray", "job", "logs", "--address", "http://127.0.0.1:8265", "--follow", "testJobId", + ";", "else", "ray", "job", "submit", "--address", "http://127.0.0.1:8265", - "--runtime-env-json", `{"test":"test"}`, - "--metadata-json", `{"testKey":"testValue"}`, + "--runtime-env-json", strconv.Quote(`{"test":"test"}`), + "--metadata-json", strconv.Quote(`{"testKey":"testValue"}`), "--submission-id", "testJobId", "--entrypoint-num-cpus", "1.000000", "--entrypoint-num-gpus", "0.500000", - "--entrypoint-resources", `{"Custom_1": 1, "Custom_2": 5.5}`, + "--entrypoint-resources", strconv.Quote(`{"Custom_1": 1, "Custom_2": 5.5}`), "--", "echo", "hello", + ";", "fi", } command, err := GetK8sJobCommand(testRayJob) assert.NoError(t, err) @@ -112,12 +114,18 @@ pip: ["python-multipart==0.0.6"] }, } expected := []string{ + "if", + "ray", "job", "status", "--address", "http://127.0.0.1:8265", "testJobId", ">/dev/null", "2>&1", + ";", "then", + "ray", "job", "logs", "--address", "http://127.0.0.1:8265", "--follow", "testJobId", + ";", "else", "ray", "job", "submit", "--address", "http://127.0.0.1:8265", - "--runtime-env-json", `{"working_dir":"https://github.com/ray-project/serve_config_examples/archive/b393e77bbd6aba0881e3d94c05f968f05a387b96.zip","pip":["python-multipart==0.0.6"]}`, - "--metadata-json", `{"testKey":"testValue"}`, + "--runtime-env-json", strconv.Quote(`{"working_dir":"https://github.com/ray-project/serve_config_examples/archive/b393e77bbd6aba0881e3d94c05f968f05a387b96.zip","pip":["python-multipart==0.0.6"]}`), + "--metadata-json", strconv.Quote(`{"testKey":"testValue"}`), "--submission-id", "testJobId", "--", "echo", "hello", + ";", "fi", } command, err := GetK8sJobCommand(rayJobWithYAML) assert.NoError(t, err) @@ -126,11 +134,17 @@ pip: ["python-multipart==0.0.6"] assert.Equal(t, len(expected), len(command)) for i := 0; i < len(expected); i++ { + // For non-JSON elements, compare them directly. + assert.Equal(t, expected[i], command[i]) if expected[i] == "--runtime-env-json" { // Decode the JSON string from the next element. var expectedMap, actualMap map[string]interface{} - err1 := json.Unmarshal([]byte(expected[i+1]), &expectedMap) - err2 := json.Unmarshal([]byte(command[i+1]), &actualMap) + unquoteExpected, err1 := strconv.Unquote(expected[i+1]) + assert.NoError(t, err1) + unquotedCommand, err2 := strconv.Unquote(command[i+1]) + assert.NoError(t, err2) + err1 = json.Unmarshal([]byte(unquoteExpected), &expectedMap) + err2 = json.Unmarshal([]byte(unquotedCommand), &actualMap) // If there's an error decoding either JSON string, it's an error in the test. assert.NoError(t, err1) @@ -141,9 +155,6 @@ pip: ["python-multipart==0.0.6"] // Skip the next element because we've just checked it. i++ - } else { - // For non-JSON elements, compare them directly. - assert.Equal(t, expected[i], command[i]) } } } diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 0182eadc914..5b9beed0fd8 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -3,17 +3,19 @@ package common import ( "bytes" "context" + "encoding/json" "fmt" "os" + "sort" "strconv" "strings" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" @@ -32,27 +34,25 @@ const ( // If set to true, kuberay auto injects an init container waiting for ray GCS. // If false, you will need to inject your own init container to ensure ray GCS is up before the ray workers start. EnableInitContainerInjectionEnvKey = "ENABLE_INIT_CONTAINER_INJECTION" + NeuronCoreContainerResourceName = "aws.amazon.com/neuroncore" + NeuronCoreRayResourceName = "neuron_cores" + TPUContainerResourceName = "google.com/tpu" + TPURayResourceName = "TPU" ) +var customAcceleratorToRayResourceMap = map[string]string{ + NeuronCoreContainerResourceName: NeuronCoreRayResourceName, + TPUContainerResourceName: TPURayResourceName, +} + // Get the port required to connect to the Ray cluster by worker nodes and drivers // started within the cluster. // For Ray >= 1.11.0 this is the GCS server port. For Ray < 1.11.0 it is the Redis port. func GetHeadPort(headStartParams map[string]string) string { - var headPort string - if value, ok := headStartParams["port"]; !ok { - // using default port - headPort = strconv.Itoa(utils.DefaultRedisPort) - } else { - // setting port from the params - headPort = value + if value, ok := headStartParams["port"]; ok { + return value } - return headPort -} - -// Check if the RayCluster has GCS fault tolerance enabled. -func IsGCSFaultToleranceEnabled(instance rayv1.RayCluster) bool { - v, ok := instance.Annotations[utils.RayFTEnabledAnnotationKey] - return ok && strings.ToLower(v) == "true" + return strconv.Itoa(utils.DefaultGcsServerPort) } // Check if overwrites the container command. @@ -66,23 +66,95 @@ func initTemplateAnnotations(instance rayv1.RayCluster, podTemplate *corev1.PodT podTemplate.Annotations = make(map[string]string) } - // For now, we just set ray external storage enabled/disabled by checking if FT is enabled/disabled. - // This may need to be updated in the future. - if IsGCSFaultToleranceEnabled(instance) { - podTemplate.Annotations[utils.RayFTEnabledAnnotationKey] = "true" - // if we have FT enabled, we need to set up a default external storage namespace. - podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey] = string(instance.UID) - } else { - podTemplate.Annotations[utils.RayFTEnabledAnnotationKey] = "false" - } - if isOverwriteRayContainerCmd(instance) { podTemplate.Annotations[utils.RayOverwriteContainerCmdAnnotationKey] = "true" } - // set ray external storage namespace if user specified one. - if instance.Annotations != nil { - if v, ok := instance.Annotations[utils.RayExternalStorageNSAnnotationKey]; ok { - podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey] = v +} + +func configureGCSFaultTolerance(podTemplate *corev1.PodTemplateSpec, instance rayv1.RayCluster, rayNodeType rayv1.RayNodeType) { + // Configure environment variables, annotations, and rayStartParams for GCS fault tolerance. + // Note that both `podTemplate` and `instance` will be modified. + ftEnabled := utils.IsGCSFaultToleranceEnabled(instance) + if podTemplate.Annotations == nil { + podTemplate.Annotations = make(map[string]string) + } + + if rayNodeType == rayv1.HeadNode { + podTemplate.Annotations[utils.RayFTEnabledAnnotationKey] = strconv.FormatBool(ftEnabled) + } + + if ftEnabled { + options := instance.Spec.GcsFaultToleranceOptions + container := &podTemplate.Spec.Containers[utils.RayContainerIndex] + + // Configure the GCS RPC server reconnect timeout for GCS FT. + if !utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env) && rayNodeType == rayv1.WorkerNode { + // If GCS FT is enabled and RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S is not set, set the worker's + // RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S to 600s. If the worker cannot reconnect to GCS within + // 600s, the Raylet will exit the process. By default, the value is 60s, so the head node will + // crash if the GCS server is down for more than 60s. Typically, the new GCS server will be available + // in 120 seconds, so we set the timeout to 600s to avoid the worker nodes crashing. + gcsTimeout := corev1.EnvVar{Name: utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, Value: utils.DefaultWorkerRayGcsReconnectTimeoutS} + container.Env = append(container.Env, gcsTimeout) + } + + // Configure the Redis address, username and password for GCS FT. + if rayNodeType == rayv1.HeadNode { + // Configure the external storage namespace for GCS FT. + storageNS := string(instance.UID) + if v, ok := instance.Annotations[utils.RayExternalStorageNSAnnotationKey]; ok { + storageNS = v + } + if options != nil && options.ExternalStorageNamespace != "" { + storageNS = options.ExternalStorageNamespace + } + podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey] = storageNS + if !utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, container.Env) { + storageNS := corev1.EnvVar{Name: utils.RAY_EXTERNAL_STORAGE_NS, Value: storageNS} + container.Env = append(container.Env, storageNS) + } + + if options != nil { + container.Env = append(container.Env, corev1.EnvVar{ + Name: utils.RAY_REDIS_ADDRESS, + Value: options.RedisAddress, + }) + if options.RedisUsername != nil { + // Note that `redis-username` will be supported starting from Ray 2.41. + // If `GcsFaultToleranceOptions.RedisUsername` is set, it will be put into the + // `REDIS_USERNAME` environment variable later. Here, we use `$REDIS_USERNAME` in + // rayStartParams to refer to the environment variable. + instance.Spec.HeadGroupSpec.RayStartParams["redis-username"] = "$REDIS_USERNAME" + container.Env = append(container.Env, corev1.EnvVar{ + Name: utils.REDIS_USERNAME, + Value: options.RedisUsername.Value, + ValueFrom: options.RedisUsername.ValueFrom, + }) + } + if options.RedisPassword != nil { + // If `GcsFaultToleranceOptions.RedisPassword` is set, it will be put into the + // `REDIS_PASSWORD` environment variable later. Here, we use `$REDIS_PASSWORD` in + // rayStartParams to refer to the environment variable. + instance.Spec.HeadGroupSpec.RayStartParams["redis-password"] = "$REDIS_PASSWORD" + container.Env = append(container.Env, corev1.EnvVar{ + Name: utils.REDIS_PASSWORD, + Value: options.RedisPassword.Value, + ValueFrom: options.RedisPassword.ValueFrom, + }) + } + } else { + // If users directly set the `redis-password` in `rayStartParams` instead of referring + // to a K8s secret, we need to set the `REDIS_PASSWORD` env var so that the Redis cleanup + // job can connect to Redis using the password. This is not recommended. + if !utils.EnvVarExists(utils.REDIS_PASSWORD, container.Env) { + // setting the REDIS_PASSWORD env var from the params + redisPasswordEnv := corev1.EnvVar{Name: utils.REDIS_PASSWORD} + if value, ok := instance.Spec.HeadGroupSpec.RayStartParams["redis-password"]; ok { + redisPasswordEnv.Value = value + container.Env = append(container.Env, redisPasswordEnv) + } + } + } } } } @@ -102,12 +174,12 @@ func DefaultHeadPodTemplate(ctx context.Context, instance rayv1.RayCluster, head podTemplate.Labels = make(map[string]string) } podTemplate.Labels = labelPod(rayv1.HeadNode, instance.Name, utils.RayNodeHeadGroupLabelValue, instance.Spec.HeadGroupSpec.Template.ObjectMeta.Labels) - headSpec.RayStartParams = setMissingRayStartParams(ctx, headSpec.RayStartParams, rayv1.HeadNode, headPort, "", instance.Annotations) + headSpec.RayStartParams = setMissingRayStartParams(ctx, headSpec.RayStartParams, rayv1.HeadNode, headPort, "") initTemplateAnnotations(instance, &podTemplate) // if in-tree autoscaling is enabled, then autoscaler container should be injected into head pod. - if instance.Spec.EnableInTreeAutoscaling != nil && *instance.Spec.EnableInTreeAutoscaling { + if utils.IsAutoscalingEnabled(&instance) { // The default autoscaler is not compatible with Kubernetes. As a result, we disable // the monitor process by default and inject a KubeRay autoscaler side container into the head pod. headSpec.RayStartParams["no-monitor"] = "true" @@ -123,7 +195,9 @@ func DefaultHeadPodTemplate(ctx context.Context, instance rayv1.RayCluster, head podTemplate.Spec.Containers = append(podTemplate.Spec.Containers, autoscalerContainer) } - // If the metrics port does not exist in the Ray container, add a default one for Promethues. + configureGCSFaultTolerance(&podTemplate, instance, rayv1.HeadNode) + + // If the metrics port does not exist in the Ray container, add a default one for Prometheus. isMetricsPortExists := utils.FindContainerPort(&podTemplate.Spec.Containers[utils.RayContainerIndex], utils.MetricsPortName, -1) != -1 if !isMetricsPortExists { metricsPort := corev1.ContainerPort{ @@ -187,7 +261,7 @@ func DefaultWorkerPodTemplate(ctx context.Context, instance rayv1.RayCluster, wo fi echo "$SECONDS seconds elapsed: Still waiting for GCS to be ready. For troubleshooting, refer to the FAQ at https://github.com/ray-project/kuberay/blob/master/docs/guidance/FAQ.md." fi - sleep 5 + sleep 5 done `, fqdnRayIP, headPort, fqdnRayIP, headPort), }, @@ -222,11 +296,12 @@ func DefaultWorkerPodTemplate(ctx context.Context, instance rayv1.RayCluster, wo podTemplate.Labels = make(map[string]string) } podTemplate.Labels = labelPod(rayv1.WorkerNode, instance.Name, workerSpec.GroupName, workerSpec.Template.ObjectMeta.Labels) - workerSpec.RayStartParams = setMissingRayStartParams(ctx, workerSpec.RayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, instance.Annotations) + workerSpec.RayStartParams = setMissingRayStartParams(ctx, workerSpec.RayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) initTemplateAnnotations(instance, &podTemplate) + configureGCSFaultTolerance(&podTemplate, instance, rayv1.WorkerNode) - // If the metrics port does not exist in the Ray container, add a default one for Promethues. + // If the metrics port does not exist in the Ray container, add a default one for Prometheus. isMetricsPortExists := utils.FindContainerPort(&podTemplate.Spec.Containers[utils.RayContainerIndex], utils.MetricsPortName, -1) != -1 if !isMetricsPortExists { metricsPort := corev1.ContainerPort{ @@ -240,8 +315,18 @@ func DefaultWorkerPodTemplate(ctx context.Context, instance rayv1.RayCluster, wo } func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType rayv1.RayNodeType, creatorCRDType utils.CRDType) { - rayAgentRayletHealthCommand := fmt.Sprintf(utils.BaseWgetHealthCommand, utils.DefaultDashboardAgentListenPort, utils.RayAgentRayletHealthPath) - rayDashboardGCSHealthCommand := fmt.Sprintf(utils.BaseWgetHealthCommand, utils.DefaultDashboardPort, utils.RayDashboardGCSHealthPath) + rayAgentRayletHealthCommand := fmt.Sprintf( + utils.BaseWgetHealthCommand, + utils.DefaultReadinessProbeTimeoutSeconds, + utils.DefaultDashboardAgentListenPort, + utils.RayAgentRayletHealthPath, + ) + rayDashboardGCSHealthCommand := fmt.Sprintf( + utils.BaseWgetHealthCommand, + utils.DefaultReadinessProbeFailureThreshold, + utils.DefaultDashboardPort, + utils.RayDashboardGCSHealthPath, + ) // Generally, the liveness and readiness probes perform the same checks. // For head node => Check GCS and Raylet status. @@ -254,9 +339,14 @@ func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType r } if rayContainer.LivenessProbe == nil { + probeTimeout := int32(utils.DefaultLivenessProbeTimeoutSeconds) + if rayNodeType == rayv1.HeadNode { + probeTimeout = int32(utils.DefaultHeadLivenessProbeTimeoutSeconds) + } + rayContainer.LivenessProbe = &corev1.Probe{ InitialDelaySeconds: utils.DefaultLivenessProbeInitialDelaySeconds, - TimeoutSeconds: utils.DefaultLivenessProbeTimeoutSeconds, + TimeoutSeconds: probeTimeout, PeriodSeconds: utils.DefaultLivenessProbePeriodSeconds, SuccessThreshold: utils.DefaultLivenessProbeSuccessThreshold, FailureThreshold: utils.DefaultLivenessProbeFailureThreshold, @@ -265,9 +355,13 @@ func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType r } if rayContainer.ReadinessProbe == nil { + probeTimeout := int32(utils.DefaultReadinessProbeTimeoutSeconds) + if rayNodeType == rayv1.HeadNode { + probeTimeout = int32(utils.DefaultHeadReadinessProbeTimeoutSeconds) + } rayContainer.ReadinessProbe = &corev1.Probe{ InitialDelaySeconds: utils.DefaultReadinessProbeInitialDelaySeconds, - TimeoutSeconds: utils.DefaultReadinessProbeTimeoutSeconds, + TimeoutSeconds: probeTimeout, PeriodSeconds: utils.DefaultReadinessProbePeriodSeconds, SuccessThreshold: utils.DefaultReadinessProbeSuccessThreshold, FailureThreshold: utils.DefaultReadinessProbeFailureThreshold, @@ -279,8 +373,12 @@ func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType r // See https://github.com/ray-project/kuberay/pull/1808 for reasons. if creatorCRDType == utils.RayServiceCRD && rayNodeType == rayv1.WorkerNode { rayContainer.ReadinessProbe.FailureThreshold = utils.ServeReadinessProbeFailureThreshold - rayServeProxyHealthCommand := fmt.Sprintf(utils.BaseWgetHealthCommand, - utils.FindContainerPort(rayContainer, utils.ServingPortName, utils.DefaultServingPort), utils.RayServeProxyHealthPath) + rayServeProxyHealthCommand := fmt.Sprintf( + utils.BaseWgetHealthCommand, + utils.DefaultReadinessProbeInitialDelaySeconds, + utils.FindContainerPort(rayContainer, utils.ServingPortName, utils.DefaultServingPort), + utils.RayServeProxyHealthPath, + ) commands = append(commands, rayServeProxyHealthCommand) rayContainer.ReadinessProbe.Exec = &corev1.ExecAction{Command: []string{"bash", "-c", strings.Join(commands, " && ")}} } @@ -288,7 +386,7 @@ func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType r } // BuildPod a pod config -func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNodeType rayv1.RayNodeType, rayStartParams map[string]string, headPort string, enableRayAutoscaler *bool, creatorCRDType utils.CRDType, fqdnRayIP string) (aPod corev1.Pod) { +func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNodeType rayv1.RayNodeType, rayStartParams map[string]string, headPort string, enableRayAutoscaler bool, creatorCRDType utils.CRDType, fqdnRayIP string) (aPod corev1.Pod) { log := ctrl.LoggerFrom(ctx) // For Worker Pod: Traffic readiness is determined by the readiness probe. @@ -313,7 +411,7 @@ func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNo // Add /dev/shm volumeMount for the object store to avoid performance degradation. addEmptyDir(ctx, &pod.Spec.Containers[utils.RayContainerIndex], &pod, SharedMemoryVolumeName, SharedMemoryVolumeMountPath, corev1.StorageMediumMemory) - if rayNodeType == rayv1.HeadNode && enableRayAutoscaler != nil && *enableRayAutoscaler { + if rayNodeType == rayv1.HeadNode && enableRayAutoscaler { // The Ray autoscaler writes logs which are read by the Ray head. // We need a shared log volume to enable this information flow. // Specifically, this is required for the event-logging functionality @@ -322,10 +420,6 @@ func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNo addEmptyDir(ctx, &pod.Spec.Containers[utils.RayContainerIndex], &pod, RayLogVolumeName, RayLogVolumeMountPath, corev1.StorageMediumDefault) addEmptyDir(ctx, &pod.Spec.Containers[autoscalerContainerIndex], &pod, RayLogVolumeName, RayLogVolumeMountPath, corev1.StorageMediumDefault) } - cleanupInvalidVolumeMounts(&pod.Spec.Containers[utils.RayContainerIndex], &pod) - if len(pod.Spec.InitContainers) > utils.RayContainerIndex { - cleanupInvalidVolumeMounts(&pod.Spec.InitContainers[utils.RayContainerIndex], &pod) - } var cmd, args string if len(pod.Spec.Containers[utils.RayContainerIndex].Command) > 0 { @@ -366,7 +460,7 @@ func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNo for index := range pod.Spec.InitContainers { setInitContainerEnvVars(&pod.Spec.InitContainers[index], fqdnRayIP) } - setContainerEnvVars(&pod, rayNodeType, rayStartParams, fqdnRayIP, headPort, rayStartCmd, creatorCRDType) + setContainerEnvVars(&pod, rayNodeType, fqdnRayIP, headPort, rayStartCmd, creatorCRDType) // Inject probes into the Ray containers if the user has not explicitly disabled them. // The feature flag `ENABLE_PROBES_INJECTION` will be removed if this feature is stable enough. @@ -419,14 +513,12 @@ func BuildAutoscalerContainer(autoscalerImage string) corev1.Container { }, }, Command: []string{ - "ray", + "/bin/bash", + "-lc", + "--", }, Args: []string{ - "kuberay-autoscaler", - "--cluster-name", - "$(RAY_CLUSTER_NAME)", - "--cluster-namespace", - "$(RAY_CLUSTER_NAMESPACE)", + "ray kuberay-autoscaler --cluster-name $(RAY_CLUSTER_NAME) --cluster-namespace $(RAY_CLUSTER_NAMESPACE)", }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -491,12 +583,8 @@ func getAutoscalerContainerIndex(pod corev1.Pod) (autoscalerContainerIndex int) // labelPod returns the labels for selecting the resources // belonging to the given RayCluster CR name. -func labelPod(rayNodeType rayv1.RayNodeType, rayClusterName string, groupName string, labels map[string]string) (ret map[string]string) { - if labels == nil { - labels = make(map[string]string) - } - - ret = map[string]string{ +func labelPod(rayNodeType rayv1.RayNodeType, rayClusterName string, groupName string, overrideLabels map[string]string) map[string]string { + labels := map[string]string{ utils.RayNodeLabelKey: "yes", utils.RayClusterLabelKey: rayClusterName, utils.RayNodeTypeLabelKey: string(rayNodeType), @@ -506,29 +594,23 @@ func labelPod(rayNodeType rayv1.RayNodeType, rayClusterName string, groupName st utils.KubernetesCreatedByLabelKey: utils.ComponentName, } - for k, v := range ret { - if k == string(rayNodeType) { - // overriding invalid values for this label - if v != string(rayv1.HeadNode) && v != string(rayv1.WorkerNode) { - labels[k] = v - } - } - if k == utils.RayNodeGroupLabelKey { - // overriding invalid values for this label - if v != groupName { - labels[k] = v - } - } - if _, ok := labels[k]; !ok { - labels[k] = v + for k, v := range overrideLabels { + // The following labels are not overridable + // - ray.io/node-type + // - ray.io/group + // - ray.io/cluster + if k == utils.RayNodeTypeLabelKey || k == utils.RayNodeGroupLabelKey || k == utils.RayClusterLabelKey { + continue } + + labels[k] = v } return labels } func setInitContainerEnvVars(container *corev1.Container, fqdnRayIP string) { - if container.Env == nil || len(container.Env) == 0 { + if len(container.Env) == 0 { container.Env = []corev1.EnvVar{} } // Init containers in both head and worker require FQ_RAY_IP. @@ -541,10 +623,10 @@ func setInitContainerEnvVars(container *corev1.Container, fqdnRayIP string) { ) } -func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, rayStartParams map[string]string, fqdnRayIP string, headPort string, rayStartCmd string, creatorCRDType utils.CRDType) { +func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, fqdnRayIP string, headPort string, rayStartCmd string, creatorCRDType utils.CRDType) { // TODO: Audit all environment variables to identify which should not be modified by users. container := &pod.Spec.Containers[utils.RayContainerIndex] - if container.Env == nil || len(container.Env) == 0 { + if len(container.Env) == 0 { container.Env = []corev1.EnvVar{} } @@ -636,41 +718,14 @@ func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, rayStar } container.Env = append(container.Env, extraTagsEnv) } - if !utils.EnvVarExists(utils.REDIS_PASSWORD, container.Env) { - // setting the REDIS_PASSWORD env var from the params - redisPasswordEnv := corev1.EnvVar{Name: utils.REDIS_PASSWORD} - if value, ok := rayStartParams["redis-password"]; ok { - redisPasswordEnv.Value = value - } - container.Env = append(container.Env, redisPasswordEnv) - } - if !utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, container.Env) { - // setting the RAY_EXTERNAL_STORAGE_NS env var from the params - if pod.Annotations != nil { - if v, ok := pod.Annotations[utils.RayExternalStorageNSAnnotationKey]; ok { - storageNS := corev1.EnvVar{Name: utils.RAY_EXTERNAL_STORAGE_NS, Value: v} - container.Env = append(container.Env, storageNS) - } - } - } - if !utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env) && rayNodeType == rayv1.WorkerNode { - // If GCS FT is enabled and RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S is not set, set the worker's - // RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S to 600s. If the worker cannot reconnect to GCS within - // 600s, the Raylet will exit the process. By default, the value is 60s, so the head node will - // crash if the GCS server is down for more than 60s. Typically, the new GCS server will be available - // in 120 seconds, so we set the timeout to 600s to avoid the worker nodes crashing. - if ftEnabled := pod.Annotations[utils.RayFTEnabledAnnotationKey] == "true"; ftEnabled { - gcsTimeout := corev1.EnvVar{Name: utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, Value: utils.DefaultWorkerRayGcsReconnectTimeoutS} - container.Env = append(container.Env, gcsTimeout) - } - } + if !utils.EnvVarExists(utils.RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE, container.Env) { // This flag enables the display of disk usage. Without this flag, the dashboard will not show disk usage. container.Env = append(container.Env, corev1.EnvVar{Name: utils.RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE, Value: "1"}) } } -func setMissingRayStartParams(ctx context.Context, rayStartParams map[string]string, nodeType rayv1.RayNodeType, headPort string, fqdnRayIP string, annotations map[string]string) (completeStartParams map[string]string) { +func setMissingRayStartParams(ctx context.Context, rayStartParams map[string]string, nodeType rayv1.RayNodeType, headPort string, fqdnRayIP string) (completeStartParams map[string]string) { log := ctrl.LoggerFrom(ctx) // Note: The argument headPort is unused for nodeType == rayv1.HeadNode. if nodeType == rayv1.WorkerNode { @@ -723,6 +778,12 @@ func generateRayStartCommand(ctx context.Context, nodeType rayv1.RayNodeType, ra cpu := resource.Limits[corev1.ResourceCPU] if !cpu.IsZero() { rayStartParams["num-cpus"] = strconv.FormatInt(cpu.Value(), 10) + } else { + // Fall back to CPU request if limit is not specified + cpu := resource.Requests[corev1.ResourceCPU] + if !cpu.IsZero() { + rayStartParams["num-cpus"] = strconv.FormatInt(cpu.Value(), 10) + } } } @@ -733,15 +794,9 @@ func generateRayStartCommand(ctx context.Context, nodeType rayv1.RayNodeType, ra } } - if _, ok := rayStartParams["num-gpus"]; !ok { - // Scan for resource keys ending with "gpu" like "nvidia.com/gpu". - for resourceKey, resource := range resource.Limits { - if strings.HasSuffix(string(resourceKey), "gpu") && !resource.IsZero() { - rayStartParams["num-gpus"] = strconv.FormatInt(resource.Value(), 10) - // For now, only support one GPU type. Break on first match. - break - } - } + // Add GPU and custom accelerator resources to rayStartParams if not already present. + if err := addWellKnownAcceleratorResources(rayStartParams, resource.Limits); err != nil { + log.Error(err, "failed to add accelerator resources to rayStartParams") } rayStartCmd := "" @@ -757,12 +812,106 @@ func generateRayStartCommand(ctx context.Context, nodeType rayv1.RayNodeType, ra return rayStartCmd } +func addWellKnownAcceleratorResources(rayStartParams map[string]string, resourceLimits corev1.ResourceList) error { + if len(resourceLimits) == 0 { + return nil + } + + resourcesMap, err := getResourcesMap(rayStartParams) + if err != nil { + return fmt.Errorf("failed to get resources map from rayStartParams: %w", err) + } + + // Flag to track if any custom accelerator resource are present/added in rayStartParams resources. + isCustomAcceleratorResourceAdded := isCustomAcceleratorPresentInResources(resourcesMap) + + // Create a sorted slice of resource keys + // Needed for consistent looping and adding first found custom accelerator resource to ray start params + sortedResourceKeys := getSortedResourceKeys(resourceLimits) + + for _, resourceKeyString := range sortedResourceKeys { + resourceValue := resourceLimits[corev1.ResourceName(resourceKeyString)] + + // Scan for resource keys ending with "gpu" like "nvidia.com/gpu" + if _, ok := rayStartParams["num-gpus"]; !ok { + if strings.HasSuffix(resourceKeyString, "gpu") && !resourceValue.IsZero() { + rayStartParams["num-gpus"] = strconv.FormatInt(resourceValue.Value(), 10) + } + } + + // Add the first encountered custom accelerator resource from the resource limits to the rayStartParams if not already present + if !isCustomAcceleratorResourceAdded { + if rayResourceName, ok := customAcceleratorToRayResourceMap[resourceKeyString]; ok && !resourceValue.IsZero() { + if _, exists := resourcesMap[rayResourceName]; !exists { + resourcesMap[rayResourceName] = resourceValue.AsApproximateFloat64() + + // Update the resources map in the rayStartParams + updatedResourcesStr, err := json.Marshal(resourcesMap) + if err != nil { + return fmt.Errorf("failed to marshal resources map to string: %w", err) + } + + rayStartParams["resources"] = fmt.Sprintf("'%s'", updatedResourcesStr) + } + isCustomAcceleratorResourceAdded = true + } + } + } + + return nil +} + +func isCustomAcceleratorPresentInResources(resourcesMap map[string]float64) bool { + // Check whether there exists any custom accelerator resources specified as part of rayStartParams + if len(resourcesMap) > 0 { + for _, customAcceleratorRayResource := range customAcceleratorToRayResourceMap { + if _, ok := resourcesMap[customAcceleratorRayResource]; ok { + return true + } + } + } + + return false +} + +func getResourcesMap(rayStartParams map[string]string) (map[string]float64, error) { + var resources map[string]float64 + if resourcesStr, ok := rayStartParams["resources"]; !ok { + resources = make(map[string]float64) + } else { + // Trim any surrounding quotes (single, double, or backticks) and spaces + resourcesStr = strings.Trim(resourcesStr, "'\"` ") + err := json.Unmarshal([]byte(resourcesStr), &resources) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal resources %w", err) + } + } + return resources, nil +} + +func getSortedResourceKeys(resourceLimits corev1.ResourceList) []string { + sortedResourceKeys := make([]string, 0, len(resourceLimits)) + for resourceKey := range resourceLimits { + sortedResourceKeys = append(sortedResourceKeys, string(resourceKey)) + } + sort.Strings(sortedResourceKeys) + return sortedResourceKeys +} + func convertParamMap(rayStartParams map[string]string) (s string) { + // Order rayStartParams keys for consistent ray start command flags generation + keys := make([]string, 0, len(rayStartParams)) + for k := range rayStartParams { + keys = append(keys, k) + } + sort.Strings(keys) + flags := new(bytes.Buffer) // specialParameterOptions' arguments can be true or false. // For example, --log-color can be auto | false | true. specialParameterOptions := []string{"log-color", "include-dashboard"} - for option, argument := range rayStartParams { + for _, option := range keys { + argument := rayStartParams[option] if utils.Contains([]string{"true", "false"}, strings.ToLower(argument)) && !utils.Contains(specialParameterOptions, option) { // booleanOptions: do not require any argument. Essentially represent boolean on-off switches. if strings.ToLower(argument) == "true" { @@ -781,7 +930,7 @@ func convertParamMap(rayStartParams map[string]string) (s string) { func addEmptyDir(ctx context.Context, container *corev1.Container, pod *corev1.Pod, volumeName string, volumeMountPath string, storageMedium corev1.StorageMedium) { log := ctrl.LoggerFrom(ctx) - if checkIfVolumeMounted(container, pod, volumeMountPath) { + if checkIfVolumeMounted(container, volumeMountPath) { log.Info("volume already mounted", "volume", volumeName, "path", volumeMountPath) return } @@ -826,7 +975,7 @@ func makeEmptyDirVolume(container *corev1.Container, volumeName string, storageM // Checks if the container has a volumeMount with the given mount path and if // the pod has a matching Volume. -func checkIfVolumeMounted(container *corev1.Container, pod *corev1.Pod, volumeMountPath string) bool { +func checkIfVolumeMounted(container *corev1.Container, volumeMountPath string) bool { for _, mountedVol := range container.VolumeMounts { if mountedVol.MountPath == volumeMountPath { return true @@ -845,81 +994,16 @@ func checkIfVolumeExists(pod *corev1.Pod, volumeName string) bool { return false } -func cleanupInvalidVolumeMounts(container *corev1.Container, pod *corev1.Pod) { - // if a volumeMount is specified in the container, - // but has no corresponding pod volume, it is removed - k := 0 - for _, mountedVol := range container.VolumeMounts { - for _, podVolume := range pod.Spec.Volumes { - if mountedVol.Name == podVolume.Name { - // valid mount, moving on... - container.VolumeMounts[k] = mountedVol - k++ - break - } - } - } - container.VolumeMounts = container.VolumeMounts[:k] -} - func findMemoryReqOrLimit(container corev1.Container) (res *resource.Quantity) { var mem *resource.Quantity - // check the requests, if they are not set, check the limits. - if q, ok := container.Resources.Requests[corev1.ResourceMemory]; ok { + // check the limits, if they are not set, check the requests. + if q, ok := container.Resources.Limits[corev1.ResourceMemory]; ok { mem = &q return mem } - if q, ok := container.Resources.Limits[corev1.ResourceMemory]; ok { + if q, ok := container.Resources.Requests[corev1.ResourceMemory]; ok { mem = &q return mem } return nil } - -// ValidateHeadRayStartParams will validate the head node's RayStartParams. -// Return a bool indicating the validity of RayStartParams and an err with additional information. -// If isValid is true, RayStartParams are valid. Any errors will only affect performance. -// If isValid is false, RayStartParams are invalid will result in an unhealthy or failed Ray cluster. -func ValidateHeadRayStartParams(ctx context.Context, rayHeadGroupSpec rayv1.HeadGroupSpec) (isValid bool, err error) { - log := ctrl.LoggerFrom(ctx) - - // TODO (dxia): if you add more validation, please split checks into separate subroutines. - var objectStoreMemory int64 - rayStartParams := rayHeadGroupSpec.RayStartParams - // validation for the object store memory - if objectStoreMemoryStr, ok := rayStartParams[ObjectStoreMemoryKey]; ok { - objectStoreMemory, err = strconv.ParseInt(objectStoreMemoryStr, 10, 64) - if err != nil { - isValid = false - err = errors.NewBadRequest(fmt.Sprintf("Cannot parse %s %s as an integer: %s", ObjectStoreMemoryKey, objectStoreMemoryStr, err.Error())) - return - } - for _, container := range rayHeadGroupSpec.Template.Spec.Containers { - // find the ray container. - if container.Name == RayHeadContainer { - if shmSize, ok := container.Resources.Requests.Memory().AsInt64(); ok && objectStoreMemory > shmSize { - if utils.EnvVarExists(AllowSlowStorageEnvVar, container.Env) { - // in ray if this env var is set, it will only affect the performance. - isValid = true - msg := fmt.Sprintf("RayStartParams: object store memory exceeds head node container's memory request, %s:%d, memory request:%d\n"+ - "This will harm performance. Consider deleting files in %s or increasing head node's memory request.", ObjectStoreMemoryKey, objectStoreMemory, shmSize, SharedMemoryVolumeMountPath) - log.Info(msg) - err = errors.NewBadRequest(msg) - return - } else { - // if not set, the head node may crash and result in an unhealthy status. - isValid = false - msg := fmt.Sprintf("RayStartParams: object store memory exceeds head node container's memory request, %s:%d, memory request:%d\n"+ - "This will lead to a ValueError in Ray! Consider deleting files in %s or increasing head node's memory request.\n"+ - "To ignore this warning, set the following environment variable in headGroupSpec: %s=1", - ObjectStoreMemoryKey, objectStoreMemory, shmSize, SharedMemoryVolumeMountPath, AllowSlowStorageEnvVar) - err = errors.NewBadRequest(msg) - return - } - } - } - } - } - // default return - return true, nil -} diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index 8f271cf6165..38ff5003099 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -6,20 +6,22 @@ import ( "os" "reflect" "sort" + "strconv" "strings" "testing" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) var testMemoryLimit = resource.MustParse("1Gi") @@ -64,13 +66,12 @@ var instance = rayv1.RayCluster{ }, WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{ - "port": "6379", - "num-cpus": "1", + "port": "6379", }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -191,14 +192,12 @@ var autoscalerContainer = corev1.Container{ }, }, Command: []string{ - "ray", + "/bin/bash", + "-lc", + "--", }, Args: []string{ - "kuberay-autoscaler", - "--cluster-name", - "$(RAY_CLUSTER_NAME)", - "--cluster-namespace", - "$(RAY_CLUSTER_NAMESPACE)", + "ray kuberay-autoscaler --cluster-name $(RAY_CLUSTER_NAME) --cluster-namespace $(RAY_CLUSTER_NAMESPACE)", }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -300,6 +299,357 @@ func checkContainerEnv(t *testing.T, container corev1.Container, envName string, } } +func assertWorkerGCSFaultToleranceConfig(t *testing.T, podTemplate *corev1.PodTemplateSpec, container corev1.Container) { + assert.Empty(t, podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey]) + assert.Empty(t, podTemplate.Annotations[utils.RayFTEnabledAnnotationKey]) + assert.True(t, utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env)) + assert.False(t, utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, container.Env)) + assert.False(t, utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, container.Env)) + assert.False(t, utils.EnvVarExists(utils.REDIS_PASSWORD, container.Env)) +} + +func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) { + tests := []struct { + name string + storageNS string + redisUsernameEnv string + redisPasswordEnv string + redisUsernameRayStartParams string + redisPasswordRayStartParams string + isHeadPod bool + gcsFTEnabled bool + }{ + { + name: "GCS FT enabled", + gcsFTEnabled: true, + isHeadPod: true, + }, + { + name: "GCS FT enabled with external storage", + gcsFTEnabled: true, + storageNS: "test-ns", + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis password env", + gcsFTEnabled: true, + redisPasswordEnv: "test-password", + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis username and password env", + gcsFTEnabled: true, + redisUsernameEnv: "test-username", + redisPasswordEnv: "test-password", + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis password ray start params", + gcsFTEnabled: true, + redisPasswordRayStartParams: "test-password", + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis username and password ray start params", + gcsFTEnabled: true, + redisUsernameRayStartParams: "test-username", + redisPasswordRayStartParams: "test-password", + isHeadPod: true, + }, + { + // The most common case. + name: "GCS FT enabled with redis password env and ray start params referring to env", + gcsFTEnabled: true, + redisPasswordEnv: "test-password", + redisPasswordRayStartParams: "$REDIS_PASSWORD", + isHeadPod: false, + }, + { + name: "GCS FT enabled with redis username and password env and ray start params referring to env", + gcsFTEnabled: true, + redisUsernameEnv: "test-username", + redisUsernameRayStartParams: "$REDIS_USERNAME", + redisPasswordEnv: "test-password", + redisPasswordRayStartParams: "$REDIS_PASSWORD", + isHeadPod: false, + }, + { + name: "GCS FT enabled / worker Pod", + gcsFTEnabled: true, + isHeadPod: false, + }, + { + name: "GCS FT disabled", + gcsFTEnabled: false, + isHeadPod: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Validate the test input + if test.redisUsernameEnv != "" && test.redisUsernameRayStartParams != "" { + assert.True(t, test.redisUsernameRayStartParams == "$REDIS_USERNAME") + } + if test.redisPasswordEnv != "" && test.redisPasswordRayStartParams != "" { + assert.True(t, test.redisPasswordRayStartParams == "$REDIS_PASSWORD") + } + + // Prepare the cluster + cluster := rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + utils.RayFTEnabledAnnotationKey: strconv.FormatBool(test.gcsFTEnabled), + }, + }, + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: rayv1.HeadGroupSpec{ + RayStartParams: map[string]string{}, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Env: []corev1.EnvVar{ + { + Name: utils.RAY_REDIS_ADDRESS, + Value: "redis:6379", + }, + }, + }, + }, + }, + }, + }, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Env: []corev1.EnvVar{}, + }, + }, + }, + }, + }, + }, + }, + } + if test.storageNS != "" { + cluster.Annotations[utils.RayExternalStorageNSAnnotationKey] = test.storageNS + } + if test.redisUsernameEnv != "" { + cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env, corev1.EnvVar{ + Name: utils.REDIS_USERNAME, + Value: test.redisUsernameEnv, + }) + } + if test.redisPasswordEnv != "" { + cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env, corev1.EnvVar{ + Name: utils.REDIS_PASSWORD, + Value: test.redisPasswordEnv, + }) + } + if test.redisUsernameRayStartParams != "" { + cluster.Spec.HeadGroupSpec.RayStartParams["redis-username"] = test.redisUsernameRayStartParams + } + if test.redisPasswordRayStartParams != "" { + cluster.Spec.HeadGroupSpec.RayStartParams["redis-password"] = test.redisPasswordRayStartParams + } + podTemplate := &cluster.Spec.HeadGroupSpec.Template + if !test.isHeadPod { + podTemplate = &cluster.Spec.WorkerGroupSpecs[0].Template + } + + // Configure GCS fault tolerance + if test.isHeadPod { + configureGCSFaultTolerance(podTemplate, cluster, rayv1.HeadNode) + } else { + configureGCSFaultTolerance(podTemplate, cluster, rayv1.WorkerNode) + } + + // Check configurations for GCS fault tolerance + container := podTemplate.Spec.Containers[utils.RayContainerIndex] + if test.isHeadPod { + assert.False(t, utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env)) + assert.Equal(t, podTemplate.Annotations[utils.RayFTEnabledAnnotationKey], strconv.FormatBool(test.gcsFTEnabled)) + if test.storageNS != "" { + assert.Equal(t, podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey], test.storageNS) + assert.True(t, utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, container.Env)) + } + if test.redisUsernameEnv != "" { + env := getEnvVar(container, utils.REDIS_USERNAME) + assert.Equal(t, env.Value, test.redisUsernameEnv) + } + if test.redisPasswordEnv != "" { + env := getEnvVar(container, utils.REDIS_PASSWORD) + assert.Equal(t, env.Value, test.redisPasswordEnv) + } else if test.redisPasswordRayStartParams != "" { + env := getEnvVar(container, utils.REDIS_PASSWORD) + assert.Equal(t, env.Value, test.redisPasswordRayStartParams) + } + } else { + assertWorkerGCSFaultToleranceConfig(t, podTemplate, container) + } + }) + } +} + +func TestConfigureGCSFaultToleranceWithGcsFTOptions(t *testing.T) { + tests := []struct { + gcsFTOptions *rayv1.GcsFaultToleranceOptions + name string + isHeadPod bool + }{ + { + name: "GCS FT enabled", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis password", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + RedisPassword: &rayv1.RedisCredential{ + Value: "test-password", + }, + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis username and password", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + RedisUsername: &rayv1.RedisCredential{ + Value: "test-username", + }, + RedisPassword: &rayv1.RedisCredential{ + Value: "test-password", + }, + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis password in secret", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + RedisPassword: &rayv1.RedisCredential{ + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.redisPassword", + }, + }, + }, + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis username and password in secret", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + RedisUsername: &rayv1.RedisCredential{ + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.redisUsername", + }, + }, + }, + RedisPassword: &rayv1.RedisCredential{ + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.redisPassword", + }, + }, + }, + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled with redis external storage namespace", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + ExternalStorageNamespace: "test-ns", + }, + isHeadPod: true, + }, + { + name: "GCS FT enabled / worker Pod", + gcsFTOptions: &rayv1.GcsFaultToleranceOptions{ + RedisAddress: "redis:6379", + }, + isHeadPod: false, + }, + } + + emptyPodTemplate := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Env: []corev1.EnvVar{}, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Prepare the cluster + cluster := rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + GcsFaultToleranceOptions: test.gcsFTOptions, + HeadGroupSpec: rayv1.HeadGroupSpec{ + RayStartParams: map[string]string{}, + Template: *emptyPodTemplate.DeepCopy(), + }, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + Template: *emptyPodTemplate.DeepCopy(), + }, + }, + }, + } + + podTemplate := &cluster.Spec.HeadGroupSpec.Template + nodeType := rayv1.HeadNode + if !test.isHeadPod { + podTemplate = &cluster.Spec.WorkerGroupSpecs[0].Template + nodeType = rayv1.WorkerNode + } + configureGCSFaultTolerance(podTemplate, cluster, nodeType) + container := podTemplate.Spec.Containers[utils.RayContainerIndex] + + if test.isHeadPod { + assert.False(t, utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env)) + assert.Equal(t, podTemplate.Annotations[utils.RayFTEnabledAnnotationKey], strconv.FormatBool(test.gcsFTOptions != nil)) + + env := getEnvVar(container, utils.RAY_REDIS_ADDRESS) + assert.Equal(t, env.Value, "redis:6379") + + if test.gcsFTOptions.RedisUsername != nil { + env := getEnvVar(container, utils.REDIS_USERNAME) + assert.Equal(t, env.Value, test.gcsFTOptions.RedisUsername.Value) + assert.Equal(t, env.ValueFrom, test.gcsFTOptions.RedisUsername.ValueFrom) + } + + if test.gcsFTOptions.RedisPassword != nil { + env := getEnvVar(container, utils.REDIS_PASSWORD) + assert.Equal(t, env.Value, test.gcsFTOptions.RedisPassword.Value) + assert.Equal(t, env.ValueFrom, test.gcsFTOptions.RedisPassword.ValueFrom) + } + if test.gcsFTOptions.ExternalStorageNamespace != "" { + assert.Equal(t, podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey], test.gcsFTOptions.ExternalStorageNamespace) + env := getEnvVar(container, utils.RAY_EXTERNAL_STORAGE_NS) + assert.Equal(t, env.Value, test.gcsFTOptions.ExternalStorageNamespace) + } + } else { + assertWorkerGCSFaultToleranceConfig(t, podTemplate, container) + } + }) + } +} + func TestBuildPod(t *testing.T) { cluster := instance.DeepCopy() ctx := context.Background() @@ -307,7 +657,7 @@ func TestBuildPod(t *testing.T) { // Test head pod podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", nil, utils.GetCRDType(""), "") + pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", false, utils.GetCRDType(""), "") // Check environment variables rayContainer := pod.Spec.Containers[utils.RayContainerIndex] @@ -362,7 +712,18 @@ func TestBuildPod(t *testing.T) { podName = cluster.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol + utils.FormatInt32(0) fqdnRayIP := utils.GenerateFQDNServiceName(ctx, *cluster, cluster.Namespace) podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") - pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", nil, utils.GetCRDType(""), fqdnRayIP) + pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", false, utils.GetCRDType(""), fqdnRayIP) + + // Check resources + rayContainer = pod.Spec.Containers[utils.RayContainerIndex] + expectedResources := corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: testMemoryLimit, + "nvidia.com/gpu": resource.MustParse("3"), + }, + } + assert.Equal(t, expectedResources.Limits, rayContainer.Resources.Limits, "Resource limits do not match") // Check environment variables rayContainer = pod.Spec.Containers[utils.RayContainerIndex] @@ -386,6 +747,54 @@ func TestBuildPod(t *testing.T) { checkContainerEnv(t, rayContainer, "TEST_ENV_NAME", "TEST_ENV_VALUE") } +func TestBuildPod_WithNoCPULimits(t *testing.T) { + cluster := instance.DeepCopy() + ctx := context.Background() + + cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Resources = corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: testMemoryLimit, + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: testMemoryLimit, + }, + } + cluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[utils.RayContainerIndex].Resources = corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: testMemoryLimit, + }, + + Limits: corev1.ResourceList{ + corev1.ResourceMemory: testMemoryLimit, + "nvidia.com/gpu": resource.MustParse("3"), + }, + } + + // Test head pod + podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) + podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") + pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", false, utils.GetCRDType(""), "") + expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --head --block --dashboard-agent-listen-port=52365 --memory=1073741824 --num-cpus=2 --metrics-export-port=8080 --dashboard-host=0.0.0.0") + actualCommandArg := splitAndSort(pod.Spec.Containers[0].Args[0]) + if !reflect.DeepEqual(expectedCommandArg, actualCommandArg) { + t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, actualCommandArg) + } + + // testing worker pod + worker := cluster.Spec.WorkerGroupSpecs[0] + podName = cluster.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol + utils.FormatInt32(0) + fqdnRayIP := utils.GenerateFQDNServiceName(ctx, *cluster, cluster.Namespace) + podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") + pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", false, utils.GetCRDType(""), fqdnRayIP) + expectedCommandArg = splitAndSort("ulimit -n 65536; ray start --block --dashboard-agent-listen-port=52365 --memory=1073741824 --num-cpus=2 --num-gpus=3 --address=raycluster-sample-head-svc.default.svc.cluster.local:6379 --port=6379 --metrics-export-port=8080") + actualCommandArg = splitAndSort(pod.Spec.Containers[0].Args[0]) + if !reflect.DeepEqual(expectedCommandArg, actualCommandArg) { + t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, actualCommandArg) + } +} + func TestBuildPod_WithOverwriteCommand(t *testing.T) { ctx := context.Background() @@ -402,7 +811,7 @@ func TestBuildPod_WithOverwriteCommand(t *testing.T) { podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - headPod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", nil, utils.GetCRDType(""), "") + headPod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", false, utils.GetCRDType(""), "") headContainer := headPod.Spec.Containers[utils.RayContainerIndex] assert.Equal(t, headContainer.Command, []string{"I am head"}) assert.Equal(t, headContainer.Args, []string{"I am head again"}) @@ -411,7 +820,7 @@ func TestBuildPod_WithOverwriteCommand(t *testing.T) { podName = cluster.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol + utils.FormatInt32(0) fqdnRayIP := utils.GenerateFQDNServiceName(ctx, *cluster, cluster.Namespace) podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") - workerPod := BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", nil, utils.GetCRDType(""), fqdnRayIP) + workerPod := BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", false, utils.GetCRDType(""), fqdnRayIP) workerContainer := workerPod.Spec.Containers[utils.RayContainerIndex] assert.Equal(t, workerContainer.Command, []string{"I am worker"}) assert.Equal(t, workerContainer.Args, []string{"I am worker again"}) @@ -423,7 +832,7 @@ func TestBuildPod_WithAutoscalerEnabled(t *testing.T) { cluster.Spec.EnableInTreeAutoscaling = &trueFlag podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", &trueFlag, utils.GetCRDType(""), "") + pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", true, utils.GetCRDType(""), "") actualResult := pod.Labels[utils.RayClusterLabelKey] expectedResult := cluster.Name @@ -480,7 +889,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) { cluster.Spec.EnableInTreeAutoscaling = &trueFlag podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", &trueFlag, utils.RayServiceCRD, "") + pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", true, utils.RayServiceCRD, "") val, ok := pod.Labels[utils.RayClusterServingServiceLabelKey] assert.True(t, ok, "Expected serve label is not present") @@ -491,7 +900,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) { podName = cluster.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol + utils.FormatInt32(0) fqdnRayIP := utils.GenerateFQDNServiceName(ctx, *cluster, cluster.Namespace) podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") - pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", nil, utils.RayServiceCRD, fqdnRayIP) + pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", false, utils.RayServiceCRD, fqdnRayIP) val, ok = pod.Labels[utils.RayClusterServingServiceLabelKey] assert.True(t, ok, "Expected serve label is not present") @@ -499,76 +908,6 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) { utils.EnvVarExists(utils.RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO, pod.Spec.Containers[utils.RayContainerIndex].Env) } -func TestBuildPod_WithGcsFtEnabled(t *testing.T) { - ctx := context.Background() - // Test 1 - cluster := instance.DeepCopy() - cluster.Annotations = map[string]string{ - utils.RayFTEnabledAnnotationKey: "true", - } - - // Build a head Pod. - podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) - podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", nil, utils.GetCRDType(""), "") - - // Check environment variable "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" - rayContainer := pod.Spec.Containers[utils.RayContainerIndex] - - // "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" should not be set on the head Pod by default - assert.True(t, !utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, rayContainer.Env)) - - // Test 2 - cluster = instance.DeepCopy() - cluster.Annotations = map[string]string{ - utils.RayFTEnabledAnnotationKey: "true", - } - - // Add "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" env var in the head group spec. - cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env, - corev1.EnvVar{Name: utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, Value: "60"}) - podTemplateSpec = DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod = BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", nil, utils.GetCRDType(""), "") - rayContainer = pod.Spec.Containers[utils.RayContainerIndex] - - // Check environment variable "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" - checkContainerEnv(t, rayContainer, utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, "60") - - // Test 3 - cluster = instance.DeepCopy() - cluster.Annotations = map[string]string{ - utils.RayFTEnabledAnnotationKey: "true", - } - - // Build a worker pod - worker := cluster.Spec.WorkerGroupSpecs[0] - podName = cluster.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol + utils.FormatInt32(0) - fqdnRayIP := utils.GenerateFQDNServiceName(ctx, *cluster, cluster.Namespace) - podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") - pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", nil, utils.GetCRDType(""), fqdnRayIP) - - // Check the default value of "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" - rayContainer = pod.Spec.Containers[utils.RayContainerIndex] - checkContainerEnv(t, rayContainer, utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, utils.DefaultWorkerRayGcsReconnectTimeoutS) - - // Test 4 - cluster = instance.DeepCopy() - cluster.Annotations = map[string]string{ - utils.RayFTEnabledAnnotationKey: "true", - } - - // Add "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" env var in the worker group spec. - cluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[utils.RayContainerIndex].Env, - corev1.EnvVar{Name: utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, Value: "120"}) - worker = cluster.Spec.WorkerGroupSpecs[0] - podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") - pod = BuildPod(ctx, podTemplateSpec, rayv1.WorkerNode, worker.RayStartParams, "6379", nil, utils.GetCRDType(""), fqdnRayIP) - - // Check the default value of "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S" - rayContainer = pod.Spec.Containers[utils.RayContainerIndex] - checkContainerEnv(t, rayContainer, utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, "120") -} - // Check that autoscaler container overrides work as expected. func TestBuildPodWithAutoscalerOptions(t *testing.T) { ctx := context.Background() @@ -633,7 +972,7 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) { SecurityContext: &customSecurityContext, } podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", &trueFlag, utils.GetCRDType(""), "") + pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", true, utils.GetCRDType(""), "") expectedContainer := *autoscalerContainer.DeepCopy() expectedContainer.Image = customAutoscalerImage expectedContainer.ImagePullPolicy = customPullPolicy @@ -758,36 +1097,6 @@ func TestHeadPodTemplate_WithServiceAccount(t *testing.T) { } } -func TestValidateHeadRayStartParams_OK(t *testing.T) { - input := instance.Spec.HeadGroupSpec.DeepCopy() - input.RayStartParams = map[string]string{"include-dashboard": "true"} - isValid, err := ValidateHeadRayStartParams(context.Background(), *input) - assert.Equal(t, true, isValid) - assert.Nil(t, err) - command := convertParamMap(input.RayStartParams) - assert.True(t, strings.Contains(command, "--include-dashboard=true")) -} - -func TestValidateHeadRayStartParams_ValidWithObjectStoreMemoryError(t *testing.T) { - input := instance.Spec.HeadGroupSpec.DeepCopy() - input.RayStartParams[ObjectStoreMemoryKey] = "2000000000" - input.Template.Spec.Containers[0].Env = append(input.Template.Spec.Containers[0].Env, corev1.EnvVar{ - Name: AllowSlowStorageEnvVar, - Value: "1", - }) - isValid, err := ValidateHeadRayStartParams(context.Background(), *input) - assert.Equal(t, true, isValid) - assert.True(t, errors.IsBadRequest(err)) -} - -func TestValidateHeadRayStartParams_InvalidObjectStoreMemory(t *testing.T) { - input := instance.Spec.HeadGroupSpec.DeepCopy() - input.RayStartParams[ObjectStoreMemoryKey] = "2000000000" - isValid, err := ValidateHeadRayStartParams(context.Background(), *input) - assert.Equal(t, false, isValid) - assert.True(t, errors.IsBadRequest(err)) -} - func splitAndSort(s string) []string { strs := strings.Split(s, " ") result := make([]string, 0, len(strs)) @@ -800,32 +1109,6 @@ func splitAndSort(s string) []string { return result } -func TestCleanupInvalidVolumeMounts(t *testing.T) { - ctx := context.Background() - cluster := instance.DeepCopy() - - // Test head pod - podName := strings.ToLower(cluster.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol + utils.FormatInt32(0)) - podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") - pod := BuildPod(ctx, podTemplateSpec, rayv1.HeadNode, cluster.Spec.HeadGroupSpec.RayStartParams, "6379", nil, utils.GetCRDType(""), "") - - pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, []corev1.VolumeMount{ - { - Name: "mock-name1", - MountPath: "/mock-path1", - ReadOnly: true, - }, - { - Name: "mock-name2", - MountPath: "/mock-path2", - ReadOnly: true, - }, - }...) - assert.Equal(t, len(pod.Spec.Containers[0].VolumeMounts), 3) - cleanupInvalidVolumeMounts(&pod.Spec.Containers[0], &pod) - assert.Equal(t, len(pod.Spec.Containers[0].VolumeMounts), 1) -} - func TestDefaultWorkerPodTemplateWithName(t *testing.T) { ctx := context.Background() @@ -842,7 +1125,8 @@ func TestDefaultWorkerPodTemplateWithName(t *testing.T) { assert.Equal(t, worker, expectedWorker) } -func containerPortExists(ports []corev1.ContainerPort, name string, containerPort int32) error { +func containerPortExists(ports []corev1.ContainerPort, containerPort int32) error { + name := utils.MetricsPortName for _, port := range ports { if port.Name == name { if port.ContainerPort != containerPort { @@ -863,7 +1147,7 @@ func TestDefaultHeadPodTemplateWithConfigurablePorts(t *testing.T) { podTemplateSpec := DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") // DefaultHeadPodTemplate will add the default metrics port if user doesn't specify it. // Verify the default metrics port exists. - if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, utils.MetricsPortName, int32(utils.DefaultMetricsPort)); err != nil { + if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, int32(utils.DefaultMetricsPort)); err != nil { t.Fatal(err) } customMetricsPort := int32(utils.DefaultMetricsPort) + 1 @@ -874,7 +1158,7 @@ func TestDefaultHeadPodTemplateWithConfigurablePorts(t *testing.T) { cluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{metricsPort} podTemplateSpec = DefaultHeadPodTemplate(ctx, *cluster, cluster.Spec.HeadGroupSpec, podName, "6379") // Verify the custom metrics port exists. - if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, utils.MetricsPortName, customMetricsPort); err != nil { + if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, customMetricsPort); err != nil { t.Fatal(err) } } @@ -890,7 +1174,7 @@ func TestDefaultWorkerPodTemplateWithConfigurablePorts(t *testing.T) { podTemplateSpec := DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") // DefaultWorkerPodTemplate will add the default metrics port if user doesn't specify it. // Verify the default metrics port exists. - if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, utils.MetricsPortName, int32(utils.DefaultMetricsPort)); err != nil { + if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, int32(utils.DefaultMetricsPort)); err != nil { t.Fatal(err) } customMetricsPort := int32(utils.DefaultMetricsPort) + 1 @@ -901,7 +1185,7 @@ func TestDefaultWorkerPodTemplateWithConfigurablePorts(t *testing.T) { cluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Ports = []corev1.ContainerPort{metricsPort} podTemplateSpec = DefaultWorkerPodTemplate(ctx, *cluster, worker, podName, fqdnRayIP, "6379") // Verify the custom metrics port exists. - if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, utils.MetricsPortName, customMetricsPort); err != nil { + if err := containerPortExists(podTemplateSpec.Spec.Containers[0].Ports, customMetricsPort); err != nil { t.Fatal(err) } } @@ -995,23 +1279,23 @@ func TestSetMissingRayStartParamsAddress(t *testing.T) { // Case 1: Head node with no address option set. rayStartParams := map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.NotContains(t, rayStartParams, "address", "Head node should not have an address option set by default.") // Case 2: Head node with custom address option set. rayStartParams = map[string]string{"address": customAddress} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, customAddress, rayStartParams["address"], fmt.Sprintf("Expected `%v` but got `%v`", customAddress, rayStartParams["address"])) // Case 3: Worker node with no address option set. rayStartParams = map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) expectedAddress := fmt.Sprintf("%s:%s", fqdnRayIP, headPort) assert.Equal(t, expectedAddress, rayStartParams["address"], fmt.Sprintf("Expected `%v` but got `%v`", expectedAddress, rayStartParams["address"])) // Case 4: Worker node with custom address option set. rayStartParams = map[string]string{"address": customAddress} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, customAddress, rayStartParams["address"], fmt.Sprintf("Expected `%v` but got `%v`", customAddress, rayStartParams["address"])) } @@ -1028,22 +1312,22 @@ func TestSetMissingRayStartParamsMetricsExportPort(t *testing.T) { // Case 1: Head node with no metrics-export-port option set. rayStartParams := map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, fmt.Sprint(utils.DefaultMetricsPort), rayStartParams["metrics-export-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(utils.DefaultMetricsPort), rayStartParams["metrics-export-port"])) // Case 2: Head node with custom metrics-export-port option set. rayStartParams = map[string]string{"metrics-export-port": fmt.Sprint(customMetricsPort)} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, fmt.Sprint(customMetricsPort), rayStartParams["metrics-export-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(customMetricsPort), rayStartParams["metrics-export-port"])) // Case 3: Worker node with no metrics-export-port option set. rayStartParams = map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, fmt.Sprint(utils.DefaultMetricsPort), rayStartParams["metrics-export-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(utils.DefaultMetricsPort), rayStartParams["metrics-export-port"])) // Case 4: Worker node with custom metrics-export-port option set. rayStartParams = map[string]string{"metrics-export-port": fmt.Sprint(customMetricsPort)} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, fmt.Sprint(customMetricsPort), rayStartParams["metrics-export-port"], fmt.Sprintf("Expected `%v` but got `%v`", fmt.Sprint(customMetricsPort), rayStartParams["metrics-export-port"])) } @@ -1059,22 +1343,22 @@ func TestSetMissingRayStartParamsBlock(t *testing.T) { // Case 1: Head node with no --block option set. rayStartParams := map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, "true", rayStartParams["block"], fmt.Sprintf("Expected `%v` but got `%v`", "true", rayStartParams["block"])) // Case 2: Head node with --block option set to false. rayStartParams = map[string]string{"block": "false"} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, "true", rayStartParams["block"], fmt.Sprintf("Expected `%v` but got `%v`", "false", rayStartParams["block"])) // Case 3: Worker node with no --block option set. rayStartParams = map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, "true", rayStartParams["block"], fmt.Sprintf("Expected `%v` but got `%v`", "true", rayStartParams["block"])) // Case 4: Worker node with --block option set to false. rayStartParams = map[string]string{"block": "false"} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, "true", rayStartParams["block"], fmt.Sprintf("Expected `%v` but got `%v`", "false", rayStartParams["block"])) } @@ -1088,23 +1372,23 @@ func TestSetMissingRayStartParamsDashboardHost(t *testing.T) { // Case 1: Head node with no dashboard-host option set. rayStartParams := map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, "0.0.0.0", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "0.0.0.0", rayStartParams["dashboard-host"])) // Case 2: Head node with dashboard-host option set. rayStartParams = map[string]string{"dashboard-host": "localhost"} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "", nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.HeadNode, headPort, "") assert.Equal(t, "localhost", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "localhost", rayStartParams["dashboard-host"])) // Case 3: Worker node with no dashboard-host option set. rayStartParams = map[string]string{} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.NotContains(t, rayStartParams, "dashboard-host", "workers should not have an dashboard-host option set.") // Case 4: Worker node with dashboard-host option set. // To maximize user empowerment, this option can be enabled. However, it is important to note that the dashboard is not available on worker nodes. rayStartParams = map[string]string{"dashboard-host": "localhost"} - rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP, nil) + rayStartParams = setMissingRayStartParams(ctx, rayStartParams, rayv1.WorkerNode, headPort, fqdnRayIP) assert.Equal(t, "localhost", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "localhost", rayStartParams["dashboard-host"])) } @@ -1184,7 +1468,7 @@ func TestInitLivenessAndReadinessProbe(t *testing.T) { assert.Nil(t, rayContainer.LivenessProbe.Exec) assert.Nil(t, rayContainer.ReadinessProbe.Exec) - // Test 2: User does not define a custom probe. KubeRay will inject Exec probe. + // Test 2: User does not define a custom probe. KubeRay will inject Exec probe for worker pod. // Here we test the case where the Ray Pod originates from RayServiceCRD, // implying that an additional serve health check will be added to the readiness probe. rayContainer.LivenessProbe = nil @@ -1194,4 +1478,155 @@ func TestInitLivenessAndReadinessProbe(t *testing.T) { assert.NotNil(t, rayContainer.ReadinessProbe.Exec) assert.False(t, strings.Contains(strings.Join(rayContainer.LivenessProbe.Exec.Command, " "), utils.RayServeProxyHealthPath)) assert.True(t, strings.Contains(strings.Join(rayContainer.ReadinessProbe.Exec.Command, " "), utils.RayServeProxyHealthPath)) + assert.Equal(t, int32(2), rayContainer.LivenessProbe.TimeoutSeconds) + assert.Equal(t, int32(2), rayContainer.ReadinessProbe.TimeoutSeconds) + + // Test 3: User does not define a custom probe. KubeRay will inject Exec probe for head pod. + // Here we test the case where the Ray Pod originates from RayServiceCRD, + // implying that an additional serve health check will be added to the readiness probe. + rayContainer.LivenessProbe = nil + rayContainer.ReadinessProbe = nil + initLivenessAndReadinessProbe(rayContainer, rayv1.HeadNode, utils.RayServiceCRD) + assert.NotNil(t, rayContainer.LivenessProbe.Exec) + assert.NotNil(t, rayContainer.ReadinessProbe.Exec) + // head pod should not have Ray Serve proxy health probes + assert.False(t, strings.Contains(strings.Join(rayContainer.LivenessProbe.Exec.Command, " "), utils.RayServeProxyHealthPath)) + assert.False(t, strings.Contains(strings.Join(rayContainer.ReadinessProbe.Exec.Command, " "), utils.RayServeProxyHealthPath)) + assert.Equal(t, int32(5), rayContainer.LivenessProbe.TimeoutSeconds) + assert.Equal(t, int32(5), rayContainer.ReadinessProbe.TimeoutSeconds) +} + +func TestGenerateRayStartCommand(t *testing.T) { + tests := []struct { + rayStartParams map[string]string + name string + expected string + nodeType rayv1.RayNodeType + resource corev1.ResourceRequirements + }{ + { + name: "WorkerNode with GPU", + nodeType: rayv1.WorkerNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + expected: "ray start --num-gpus=1 ", + }, + { + name: "WorkerNode with TPU", + nodeType: rayv1.WorkerNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "google.com/tpu": resource.MustParse("4"), + }, + }, + expected: `ray start --resources='{"TPU":4}' `, + }, + { + name: "HeadNode with Neuron Cores", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aws.amazon.com/neuroncore": resource.MustParse("4"), + }, + }, + expected: `ray start --head --resources='{"neuron_cores":4}' `, + }, + { + name: "HeadNode with multiple accelerators", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aws.amazon.com/neuroncore": resource.MustParse("4"), + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + expected: `ray start --head --num-gpus=1 --resources='{"neuron_cores":4}' `, + }, + { + name: "HeadNode with multiple custom accelerators", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "google.com/tpu": resource.MustParse("8"), + "aws.amazon.com/neuroncore": resource.MustParse("4"), + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + expected: `ray start --head --num-gpus=1 --resources='{"neuron_cores":4}' `, + }, + { + name: "HeadNode with existing resources", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{ + "resources": `"{"custom_resource":2}"`, + }, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aws.amazon.com/neuroncore": resource.MustParse("4"), + }, + }, + expected: `ray start --head --resources='{"custom_resource":2,"neuron_cores":4}' `, + }, + { + name: "HeadNode with existing neuron_cores resources", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{ + "resources": `'{"custom_resource":2,"neuron_cores":3}'`, + }, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aws.amazon.com/neuroncore": resource.MustParse("4"), + }, + }, + expected: `ray start --head --resources='{"custom_resource":2,"neuron_cores":3}' `, + }, + { + name: "HeadNode with existing TPU resources", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{ + "resources": `'{"custom_resource":2,"TPU":4}'`, + }, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "google.com/tpu": resource.MustParse("8"), + }, + }, + expected: `ray start --head --resources='{"custom_resource":2,"TPU":4}' `, + }, + { + name: "HeadNode with invalid resources string", + nodeType: rayv1.HeadNode, + rayStartParams: map[string]string{ + "resources": "{", + }, + resource: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "aws.amazon.com/neuroncore": resource.MustParse("4"), + }, + }, + expected: "ray start --head --resources={ ", + }, + { + name: "Invalid node type", + nodeType: "InvalidType", + rayStartParams: map[string]string{}, + resource: corev1.ResourceRequirements{}, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := generateRayStartCommand(context.TODO(), tt.nodeType, tt.rayStartParams, tt.resource) + assert.Equal(t, tt.expected, result) + }) + } } diff --git a/ray-operator/controllers/ray/common/rbac.go b/ray-operator/controllers/ray/common/rbac.go index 45457641861..2b2fba768d7 100644 --- a/ray-operator/controllers/ray/common/rbac.go +++ b/ray-operator/controllers/ray/common/rbac.go @@ -1,11 +1,12 @@ package common import ( - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) // BuildServiceAccount creates a new ServiceAccount for a head pod with autoscaler. @@ -41,7 +42,7 @@ func BuildRole(cluster *rayv1.RayCluster) (*rbacv1.Role, error) { { APIGroups: []string{""}, Resources: []string{"pods"}, - Verbs: []string{"get", "list", "watch", "patch"}, + Verbs: []string{"get", "list", "watch"}, }, { APIGroups: []string{"ray.io"}, diff --git a/ray-operator/controllers/ray/common/route.go b/ray-operator/controllers/ray/common/route.go index eb3a09c2517..14e41236d75 100644 --- a/ray-operator/controllers/ray/common/route.go +++ b/ray-operator/controllers/ray/common/route.go @@ -4,9 +4,10 @@ import ( "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" routev1 "github.com/openshift/api/route/v1" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) // BuildRouteForHeadService Builds the Route (OpenShift) for head service dashboard. diff --git a/ray-operator/controllers/ray/common/route_test.go b/ray-operator/controllers/ray/common/route_test.go index 1d3956d9f93..7658fd769f2 100644 --- a/ray-operator/controllers/ray/common/route_test.go +++ b/ray-operator/controllers/ray/common/route_test.go @@ -4,12 +4,13 @@ import ( "strings" "testing" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) var instanceWithRouteEnabled = &rayv1.RayCluster{ @@ -22,7 +23,7 @@ var instanceWithRouteEnabled = &rayv1.RayCluster{ }, Spec: rayv1.RayClusterSpec{ HeadGroupSpec: rayv1.HeadGroupSpec{ - EnableIngress: pointer.Bool(true), + EnableIngress: ptr.To(true), Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ diff --git a/ray-operator/controllers/ray/common/service.go b/ray-operator/controllers/ray/common/service.go index 7ffb8f58eeb..0c2ae6ecaa6 100644 --- a/ray-operator/controllers/ray/common/service.go +++ b/ray-operator/controllers/ray/common/service.go @@ -3,7 +3,9 @@ package common import ( "context" "fmt" + "os" "sort" + "strings" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -13,6 +15,10 @@ import ( "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) +func getEnableRayHeadClusterIPService() bool { + return strings.ToLower(os.Getenv(utils.ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE)) == "true" +} + // HeadServiceLabels returns the default labels for a cluster's head service. func HeadServiceLabels(cluster rayv1.RayCluster) map[string]string { return map[string]string{ @@ -33,40 +39,40 @@ func BuildServiceForHeadPod(ctx context.Context, cluster rayv1.RayCluster, label labels = make(map[string]string) } - default_labels := HeadServiceLabels(cluster) + defaultLabels := HeadServiceLabels(cluster) - // selector consists of *only* the keys in default_labels, updated with the values in labels if they exist + // selector consists of *only* the keys in defaultLabels, updated with the values in labels if they exist selector := make(map[string]string) - for k := range default_labels { + for k := range defaultLabels { if _, ok := labels[k]; ok { selector[k] = labels[k] } else { - selector[k] = default_labels[k] + selector[k] = defaultLabels[k] } } // Deep copy the selector to avoid modifying the original object - labels_for_service := make(map[string]string) + labelsForService := make(map[string]string) for k, v := range selector { - labels_for_service[k] = v + labelsForService[k] = v } if annotations == nil { annotations = make(map[string]string) } - default_name, err := utils.GenerateHeadServiceName(utils.RayClusterCRD, cluster.Spec, cluster.Name) + defaultName, err := utils.GenerateHeadServiceName(utils.RayClusterCRD, cluster.Spec, cluster.Name) if err != nil { return nil, err } - default_namespace := cluster.Namespace - default_type := cluster.Spec.HeadGroupSpec.ServiceType + defaultNamespace := cluster.Namespace + defaultType := cluster.Spec.HeadGroupSpec.ServiceType defaultAppProtocol := utils.DefaultServiceAppProtocol - // `ports_int` is a map of port names to port numbers, while `ports` is a list of ServicePort objects - ports_int := getServicePorts(cluster) + // `portsInt` is a map of port names to port numbers, while `ports` is a list of ServicePort objects + portsInt := getServicePorts(cluster) ports := []corev1.ServicePort{} - for name, port := range ports_int { + for name, port := range portsInt { svcPort := corev1.ServicePort{Name: name, Port: port, AppProtocol: &defaultAppProtocol} ports = append(ports, svcPort) } @@ -97,27 +103,32 @@ func BuildServiceForHeadPod(ctx context.Context, cluster rayv1.RayCluster, label // Append default ports. headService.Spec.Ports = append(headService.Spec.Ports, ports...) - setLabelsforUserProvidedService(headService, labels_for_service) - setNameforUserProvidedService(ctx, headService, default_name) - setNamespaceforUserProvidedService(ctx, headService, default_namespace) - setServiceTypeForUserProvidedService(ctx, headService, default_type) + setLabelsforUserProvidedService(headService, labelsForService) + setNameforUserProvidedService(ctx, headService, defaultName) + setNamespaceforUserProvidedService(ctx, headService, defaultNamespace) + setServiceTypeForUserProvidedService(ctx, headService, defaultType) return headService, nil } headService := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: default_name, - Namespace: default_namespace, - Labels: labels_for_service, + Name: defaultName, + Namespace: defaultNamespace, + Labels: labelsForService, Annotations: annotations, }, Spec: corev1.ServiceSpec{ Selector: selector, Ports: ports, - Type: default_type, + Type: defaultType, }, } + if !getEnableRayHeadClusterIPService() && (defaultType == "" || defaultType == corev1.ServiceTypeClusterIP) { + // Make the head service headless by default, because a RayCluster should have at most one head Pod. + headService.Spec.ClusterIP = corev1.ClusterIPNone + headService.Spec.PublishNotReadyAddresses = true // We don't need to hide the Head address if its health checks failed. + } // This change ensures that reconciliation in rayservice_controller will not update the Service spec due to change in ports order // sorting the ServicePorts on their name @@ -191,17 +202,17 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus selectorLabels[utils.RayClusterServingServiceLabelKey] = utils.EnableRayClusterServingServiceTrue } - default_name := utils.GenerateServeServiceName(name) - default_namespace := namespace - default_type := rayCluster.Spec.HeadGroupSpec.ServiceType + defaultName := utils.GenerateServeServiceName(name) + defaultNamespace := namespace + defaultType := rayCluster.Spec.HeadGroupSpec.ServiceType if isRayService { - default_type = rayService.Spec.RayClusterSpec.HeadGroupSpec.ServiceType + defaultType = rayService.Spec.RayClusterSpec.HeadGroupSpec.ServiceType } - // `ports_int` is a map of port names to port numbers, while `ports` is a list of ServicePort objects - ports_int := getServicePorts(rayCluster) - ports := []corev1.ServicePort{} - for name, port := range ports_int { + // `portsInt` is a map of port names to port numbers, while `ports` is a list of ServicePort objects + portsInt := getServicePorts(rayCluster) + ports := make([]corev1.ServicePort, 0, 1) + for name, port := range portsInt { if name == utils.ServingPortName { svcPort := corev1.ServicePort{Name: name, Port: port} ports = append(ports, svcPort) @@ -234,7 +245,7 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus log.Info("port with name 'serve' already added. Ignoring user provided ports for serve service") serveService.Spec.Ports = ports } else { - ports := []corev1.ServicePort{} + ports := make([]corev1.ServicePort, 0, 1) for _, port := range serveService.Spec.Ports { if port.Name == utils.ServingPortName { svcPort := corev1.ServicePort{Name: port.Name, Port: port.Port} @@ -246,9 +257,9 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus } setLabelsforUserProvidedService(serveService, labels) - setNameforUserProvidedService(ctx, serveService, default_name) - setNamespaceforUserProvidedService(ctx, serveService, default_namespace) - setServiceTypeForUserProvidedService(ctx, serveService, default_type) + setNameforUserProvidedService(ctx, serveService, defaultName) + setNamespaceforUserProvidedService(ctx, serveService, defaultNamespace) + setServiceTypeForUserProvidedService(ctx, serveService, defaultType) return serveService, nil } @@ -262,14 +273,14 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus serveService := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: default_name, - Namespace: default_namespace, + Name: defaultName, + Namespace: defaultNamespace, Labels: labels, }, Spec: corev1.ServiceSpec{ Selector: selectorLabels, Ports: ports, - Type: default_type, + Type: defaultType, }, } @@ -277,7 +288,7 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus } // BuildHeadlessService builds the headless service for workers in multi-host worker groups to communicate -func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) (*corev1.Service, error) { +func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) *corev1.Service { name := rayCluster.Name + utils.DashSymbol + utils.HeadlessServiceSuffix namespace := rayCluster.Namespace @@ -299,51 +310,54 @@ func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) (*corev1.Ser ClusterIP: "None", Selector: selectorLabels, Type: corev1.ServiceTypeClusterIP, + // The headless worker service is used for peer communication between multi-host workers and should not be + // dependent on Proxy Actor placement to publish DNS addresses. + PublishNotReadyAddresses: true, }, } - return headlessService, nil + return headlessService } -func setServiceTypeForUserProvidedService(ctx context.Context, service *corev1.Service, default_type corev1.ServiceType) { +func setServiceTypeForUserProvidedService(ctx context.Context, service *corev1.Service, defaultType corev1.ServiceType) { log := ctrl.LoggerFrom(ctx) // If the user has not specified a service type, use the default service type if service.Spec.Type == "" { log.Info("Using default serviceType passed for the user provided service", - "default_type passed", default_type, + "default_type passed", defaultType, "service.ObjectMeta.Name", service.ObjectMeta.Name) - service.Spec.Type = default_type + service.Spec.Type = defaultType } else { log.Info("Overriding default serviceType with user provided serviceType", - "default_type passed", default_type, + "default_type passed", defaultType, "service.ObjectMeta.Name", service.ObjectMeta.Name, - "default_type passed", default_type, + "default_type passed", defaultType, "service.Spec.Type", service.Spec.Type) } } -func setNamespaceforUserProvidedService(ctx context.Context, service *corev1.Service, default_namespace string) { +func setNamespaceforUserProvidedService(ctx context.Context, service *corev1.Service, defaultNamespace string) { log := ctrl.LoggerFrom(ctx) // If the user has specified a namespace, ignore it and raise a warning - if service.ObjectMeta.Namespace != "" && service.ObjectMeta.Namespace != default_namespace { + if service.ObjectMeta.Namespace != "" && service.ObjectMeta.Namespace != defaultNamespace { log.Info("Ignoring namespace in user provided service", "provided_namespace", service.ObjectMeta.Namespace, "service_name", service.ObjectMeta.Name, - "default_namespace", default_namespace) + "default_namespace", defaultNamespace) } - service.ObjectMeta.Namespace = default_namespace + service.ObjectMeta.Namespace = defaultNamespace } -func setNameforUserProvidedService(ctx context.Context, service *corev1.Service, default_name string) { +func setNameforUserProvidedService(ctx context.Context, service *corev1.Service, defaultName string) { log := ctrl.LoggerFrom(ctx) // If the user has not specified a name, use the default name passed if service.ObjectMeta.Name == "" { - log.Info("Using default name for user provided service.", "default_name", default_name) - service.ObjectMeta.Name = default_name + log.Info("Using default name for user provided service.", "default_name", defaultName) + service.ObjectMeta.Name = defaultName } else { log.Info("Overriding default name for user provided service with name in service.ObjectMeta.Name.", - "default_name", default_name, + "default_name", defaultName, "provided_name", service.ObjectMeta.Name) } } @@ -361,9 +375,9 @@ func setLabelsforUserProvidedService(service *corev1.Service, labels map[string] // getServicePorts will either user passing ports or default ports to create service. func getServicePorts(cluster rayv1.RayCluster) map[string]int32 { - ports, err := getPortsFromCluster(cluster) + ports := getPortsFromCluster(cluster) // Assign default ports - if err != nil || len(ports) == 0 { + if len(ports) == 0 { ports = getDefaultPorts() } @@ -378,7 +392,7 @@ func getServicePorts(cluster rayv1.RayCluster) map[string]int32 { // getPortsFromCluster get the ports from head container and directly map them in service // It's user's responsibility to maintain rayStartParam ports and container ports mapping // TODO: Consider to infer ports from rayStartParams (source of truth) in the future. -func getPortsFromCluster(cluster rayv1.RayCluster) (map[string]int32, error) { +func getPortsFromCluster(cluster rayv1.RayCluster) map[string]int32 { svcPorts := map[string]int32{} cPorts := cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Ports @@ -389,13 +403,13 @@ func getPortsFromCluster(cluster rayv1.RayCluster) (map[string]int32, error) { svcPorts[port.Name] = port.ContainerPort } - return svcPorts, nil + return svcPorts } func getDefaultPorts() map[string]int32 { return map[string]int32{ utils.ClientPortName: utils.DefaultClientPort, - utils.RedisPortName: utils.DefaultRedisPort, + utils.GcsServerPortName: utils.DefaultGcsServerPort, utils.DashboardPortName: utils.DefaultDashboardPort, utils.MetricsPortName: utils.DefaultMetricsPort, utils.ServingPortName: utils.DefaultServingPort, diff --git a/ray-operator/controllers/ray/common/service_test.go b/ray-operator/controllers/ray/common/service_test.go index 148826158b3..fcea1d2a49b 100644 --- a/ray-operator/controllers/ray/common/service_test.go +++ b/ray-operator/controllers/ray/common/service_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "testing" @@ -96,7 +97,7 @@ var ( }, }, } - instanceForServeSvc = &rayv1.RayCluster{ + instanceForSvc = &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: "raycluster-sample-svc", Namespace: "default", @@ -130,7 +131,7 @@ func TestBuildServiceForHeadPod(t *testing.T) { assert.Nil(t, err) actualResult := svc.Spec.Selector[utils.RayClusterLabelKey] - expectedResult := string(instanceWithWrongSvc.Name) + expectedResult := instanceWithWrongSvc.Name if !reflect.DeepEqual(expectedResult, actualResult) { t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult) } @@ -154,6 +155,21 @@ func TestBuildServiceForHeadPod(t *testing.T) { t.Fatalf("Expected `%v` but got `%v`", expectedResult, *port.AppProtocol) } } + // BuildServiceForHeadPod should generate a headless service for a Head Pod by default. + if svc.Spec.ClusterIP != corev1.ClusterIPNone { + t.Fatalf("Expected `%v` but got `%v`", corev1.ClusterIPNone, svc.Spec.ClusterIP) + } +} + +func TestBuildClusterIPServiceForHeadPod(t *testing.T) { + os.Setenv(utils.ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE, "true") + defer os.Unsetenv(utils.ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE) + svc, err := BuildServiceForHeadPod(context.Background(), *instanceWithWrongSvc, nil, nil) + assert.Nil(t, err) + // BuildServiceForHeadPod should not generate a headless service for a Head Pod if ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE is set. + if svc.Spec.ClusterIP == corev1.ClusterIPNone { + t.Fatalf("Not expected `%v` but got `%v`", corev1.ClusterIPNone, svc.Spec.ClusterIP) + } } func TestBuildServiceForHeadPodWithAppNameLabel(t *testing.T) { @@ -191,8 +207,7 @@ func TestBuildServiceForHeadPodWithAnnotations(t *testing.T) { } func TestGetPortsFromCluster(t *testing.T) { - svcPorts, err := getPortsFromCluster(*instanceWithWrongSvc) - assert.Nil(t, err) + svcPorts := getPortsFromCluster(*instanceWithWrongSvc) // getPortsFromCluster creates service ports based on the container ports. // It will assign a generated service port name if the container port name @@ -270,6 +285,8 @@ func TestUserSpecifiedHeadService(t *testing.T) { userSelector := map[string]string{"userSelectorKey": "userSelectorValue", utils.RayClusterLabelKey: "userSelectorClusterName"} // Specify a "LoadBalancer" type, which differs from the default "ClusterIP" type. userType := corev1.ServiceTypeLoadBalancer + // Specify an empty ClusterIP, which differs from the default "None" used by the BuildServeServiceForRayService. + userClusterIP := "" testRayClusterWithHeadService.Spec.HeadGroupSpec.HeadService = &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: userName, @@ -278,52 +295,58 @@ func TestUserSpecifiedHeadService(t *testing.T) { Annotations: userAnnotations, }, Spec: corev1.ServiceSpec{ - Ports: userPorts, - Selector: userSelector, - Type: userType, + Ports: userPorts, + Selector: userSelector, + Type: userType, + ClusterIP: userClusterIP, }, } // These labels originate from HeadGroupSpec.Template.ObjectMeta.Labels userTemplateClusterName := "userTemplateClusterName" - template_labels := map[string]string{utils.RayClusterLabelKey: userTemplateClusterName} - headService, err := BuildServiceForHeadPod(context.Background(), *testRayClusterWithHeadService, template_labels, testRayClusterWithHeadService.Spec.HeadServiceAnnotations) + templateLabels := map[string]string{utils.RayClusterLabelKey: userTemplateClusterName} + headService, err := BuildServiceForHeadPod(context.Background(), *testRayClusterWithHeadService, templateLabels, testRayClusterWithHeadService.Spec.HeadServiceAnnotations) if err != nil { t.Errorf("failed to build head service: %v", err) } + // BuildServiceForHeadPod should respect the ClusterIP specified by users. + if headService.Spec.ClusterIP != userClusterIP { + t.Fatalf("Expected `%v` but got `%v`", userClusterIP, headService.Spec.ClusterIP) + } + // The selector field should only use the keys from the five default labels. The values should be updated with the values from the template labels. // The user-provided HeadService labels should be ignored for the purposes of the selector field. The user-provided Selector field should be ignored. - default_labels := HeadServiceLabels(*testRayClusterWithHeadService) + defaultLabels := HeadServiceLabels(*testRayClusterWithHeadService) // Make sure this test isn't spuriously passing. Check that RayClusterLabelKey is in the default labels. - if _, ok := default_labels[utils.RayClusterLabelKey]; !ok { + if _, ok := defaultLabels[utils.RayClusterLabelKey]; !ok { t.Errorf("utils.RayClusterLabelKey=%s should be in the default labels", utils.RayClusterLabelKey) } for k, v := range headService.Spec.Selector { // If k is not in the default labels, then the selector field should not contain it. - if _, ok := default_labels[k]; !ok { + if _, ok := defaultLabels[k]; !ok { t.Errorf("Selector field should not contain key=%s", k) } // If k is in the template labels, then the selector field should contain it with the value from the template labels. // Otherwise, it should contain the value from the default labels. - if _, ok := template_labels[k]; ok { - if v != template_labels[k] { - t.Errorf("Selector field should contain key=%s with value=%s, actual value=%s", k, template_labels[k], v) + if _, ok := templateLabels[k]; ok { + if v != templateLabels[k] { + t.Errorf("Selector field should contain key=%s with value=%s, actual value=%s", k, templateLabels[k], v) } } else { - if v != default_labels[k] { - t.Errorf("Selector field should contain key=%s with value=%s, actual value=%s", k, default_labels[k], v) + if v != defaultLabels[k] { + t.Errorf("Selector field should contain key=%s with value=%s, actual value=%s", k, defaultLabels[k], v) } } } // The selector field should have every key from the default labels. - for k := range default_labels { + for k := range defaultLabels { if _, ok := headService.Spec.Selector[k]; !ok { t.Errorf("Selector field should contain key=%s", k) } } // Print default labels for debugging - for k, v := range default_labels { + for k, v := range defaultLabels { fmt.Printf("default label: key=%s, value=%s\n", k, v) } @@ -427,12 +450,55 @@ func TestBuildServiceForHeadPodPortsOrder(t *testing.T) { } } +func TestBuildHeadlessServiceForRayCluster(t *testing.T) { + svc := BuildHeadlessServiceForRayCluster(*instanceForSvc) + + actualSelector := svc.Spec.Selector[utils.RayClusterLabelKey] + expectedSelector := instanceForSvc.Name + if !reflect.DeepEqual(expectedSelector, actualSelector) { + t.Fatalf("Expected `%v` but got `%v`", expectedSelector, actualSelector) + } + + actualSelector = svc.Spec.Selector[utils.RayNodeTypeLabelKey] + expectedSelector = string(rayv1.WorkerNode) + if !reflect.DeepEqual(expectedSelector, actualSelector) { + t.Fatalf("Expected `%v` but got `%v`", expectedSelector, actualSelector) + } + + actualLabel := svc.Labels[utils.RayClusterHeadlessServiceLabelKey] + expectedLabel := instanceForSvc.Name + if !reflect.DeepEqual(expectedLabel, actualLabel) { + t.Fatalf("Expected `%v` but got `%v`", expectedLabel, actualLabel) + } + + actualType := svc.Spec.Type + expectedType := corev1.ServiceTypeClusterIP + if !reflect.DeepEqual(expectedType, actualType) { + t.Fatalf("Expected `%v` but got `%v`", expectedType, actualType) + } + + actualClusterIP := svc.Spec.ClusterIP + expectedClusterIP := corev1.ClusterIPNone + if !reflect.DeepEqual(expectedClusterIP, actualClusterIP) { + t.Fatalf("Expected `%v` but got `%v`", expectedClusterIP, actualClusterIP) + } + + actualPublishNotReadyAddresses := svc.Spec.PublishNotReadyAddresses + expectedPublishNotReadyAddresses := true + if !reflect.DeepEqual(expectedClusterIP, actualClusterIP) { + t.Fatalf("Expected `%v` but got `%v`", expectedPublishNotReadyAddresses, actualPublishNotReadyAddresses) + } + + expectedName := fmt.Sprintf("%s-%s", instanceForSvc.Name, utils.HeadlessServiceSuffix) + validateNameAndNamespaceForUserSpecifiedService(svc, serviceInstance.ObjectMeta.Namespace, expectedName, t) +} + func TestBuildServeServiceForRayService(t *testing.T) { svc, err := BuildServeServiceForRayService(context.Background(), *serviceInstance, *instanceWithWrongSvc) assert.Nil(t, err) actualResult := svc.Spec.Selector[utils.RayClusterLabelKey] - expectedResult := string(instanceWithWrongSvc.Name) + expectedResult := instanceWithWrongSvc.Name if !reflect.DeepEqual(expectedResult, actualResult) { t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult) } @@ -460,17 +526,17 @@ func TestBuildServeServiceForRayService(t *testing.T) { } func TestBuildServeServiceForRayCluster(t *testing.T) { - svc, err := BuildServeServiceForRayCluster(context.Background(), *instanceForServeSvc) + svc, err := BuildServeServiceForRayCluster(context.Background(), *instanceForSvc) assert.Nil(t, err) actualResult := svc.Spec.Selector[utils.RayClusterLabelKey] - expectedResult := string(instanceForServeSvc.Name) + expectedResult := instanceForSvc.Name if !reflect.DeepEqual(expectedResult, actualResult) { t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult) } actualLabel := svc.Labels[utils.RayOriginatedFromCRNameLabelKey] - expectedLabel := instanceForServeSvc.Name + expectedLabel := instanceForSvc.Name assert.Equal(t, expectedLabel, actualLabel) actualLabel = svc.Labels[utils.RayOriginatedFromCRDLabelKey] @@ -478,12 +544,12 @@ func TestBuildServeServiceForRayCluster(t *testing.T) { assert.Equal(t, expectedLabel, actualLabel) actualType := svc.Spec.Type - expectedType := instanceForServeSvc.Spec.HeadGroupSpec.ServiceType + expectedType := instanceForSvc.Spec.HeadGroupSpec.ServiceType if !reflect.DeepEqual(expectedType, actualType) { t.Fatalf("Expected `%v` but got `%v`", expectedType, actualType) } - expectedName := fmt.Sprintf("%s-%s-%s", instanceForServeSvc.Name, "serve", "svc") + expectedName := fmt.Sprintf("%s-%s-%s", instanceForSvc.Name, "serve", "svc") validateNameAndNamespaceForUserSpecifiedService(svc, serviceInstance.ObjectMeta.Namespace, expectedName, t) } @@ -593,7 +659,7 @@ func validateServiceTypeForUserSpecifiedService(svc *corev1.Service, userType co } } -func validateNameAndNamespaceForUserSpecifiedService(svc *corev1.Service, default_namespace string, userName string, t *testing.T) { +func validateNameAndNamespaceForUserSpecifiedService(svc *corev1.Service, defaultNamespace string, userName string, t *testing.T) { // Test name and namespace are generated if not specified if svc.ObjectMeta.Name == "" { t.Errorf("Generated service name is empty") @@ -602,8 +668,8 @@ func validateNameAndNamespaceForUserSpecifiedService(svc *corev1.Service, defaul t.Errorf("Generated service namespace is empty") } // The user-provided namespace should be ignored, but the name should be respected - if svc.ObjectMeta.Namespace != default_namespace { - t.Errorf("User-provided namespace should be ignored: expected namespace=%s, actual namespace=%s", default_namespace, svc.ObjectMeta.Namespace) + if svc.ObjectMeta.Namespace != defaultNamespace { + t.Errorf("User-provided namespace should be ignored: expected namespace=%s, actual namespace=%s", defaultNamespace, svc.ObjectMeta.Namespace) } if svc.ObjectMeta.Name != userName { t.Errorf("User-provided name should be respected: expected name=%s, actual name=%s", userName, svc.ObjectMeta.Name) diff --git a/ray-operator/controllers/ray/common/test_utils.go b/ray-operator/controllers/ray/common/test_utils.go index 80253d4976c..22d082805d1 100644 --- a/ray-operator/controllers/ray/common/test_utils.go +++ b/ray-operator/controllers/ray/common/test_utils.go @@ -4,8 +4,9 @@ import ( "bytes" "testing" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) // Generate a string of length 200. diff --git a/ray-operator/controllers/ray/expectations/scale_expectations.go b/ray-operator/controllers/ray/expectations/scale_expectations.go new file mode 100644 index 00000000000..a684128447c --- /dev/null +++ b/ray-operator/controllers/ray/expectations/scale_expectations.go @@ -0,0 +1,178 @@ +package expectations + +import ( + "context" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const HeadGroup = "" + +// ScaleAction is the action of scale, like create and delete. +type ScaleAction string + +const ( + // Create action + Create ScaleAction = "create" + // Delete action + Delete ScaleAction = "delete" +) + +const ( + // GroupIndex indexes pods within the specified group in RayCluster + GroupIndex = "group" + // RayClusterIndex indexes pods within the RayCluster + RayClusterIndex = "raycluster" +) + +var ExpectationsTimeout = time.Second * 30 + +// RayClusterScaleExpectation is an interface that to set and wait on expectations of RayCluster groups scale. +type RayClusterScaleExpectation interface { + ExpectScalePod(namespace, rayClusterName, group, podName string, action ScaleAction) + IsSatisfied(ctx context.Context, namespace, rayClusterName, group string) bool + Delete(rayClusterName, namespace string) +} + +func NewRayClusterScaleExpectation(client client.Client) RayClusterScaleExpectation { + return &rayClusterScaleExpectationImpl{ + Client: client, + itemsCache: cache.NewIndexer(rayPodKey, cache.Indexers{GroupIndex: groupIndexFunc, RayClusterIndex: rayClusterIndexFunc}), + } +} + +type rayClusterScaleExpectationImpl struct { + client.Client + // itemsCache is only used to cache rayPod. + itemsCache cache.Indexer +} + +func (r *rayClusterScaleExpectationImpl) ExpectScalePod(namespace, rayClusterName, group, name string, action ScaleAction) { + // Strictly limit the data type stored in itemsCache to rayPod. + // If an error occurs, it must be due to an issue with our usage. We should panic immediately instead of returning an error. + if err := r.itemsCache.Add(&rayPod{ + name: name, + namespace: namespace, + group: group, + rayCluster: rayClusterName, + action: action, + recordTimestamp: time.Now(), + }); err != nil { + // If an error occurs, it indicates that there is an issue with our KeyFunc. + // This is a fatal error, panic it. + panic(err) + } +} + +func (r *rayClusterScaleExpectationImpl) IsSatisfied(ctx context.Context, namespace, rayClusterName, group string) (isSatisfied bool) { + items, err := r.itemsCache.ByIndex(GroupIndex, fmt.Sprintf("%s/%s/%s", namespace, rayClusterName, group)) + if err != nil { + // An error occurs when there is no corresponding IndexFunc for GroupIndex. This should be a fatal error. + panic(err) + } + isSatisfied = true + for i := range items { + rp := items[i].(*rayPod) + pod := &corev1.Pod{} + isPodSatisfied := false + switch rp.action { + case Create: + if err := r.Get(ctx, types.NamespacedName{Name: rp.name, Namespace: namespace}, pod); err == nil { + isPodSatisfied = true + } else { + // Tolerating extreme case: + // The first reconciliation created a Pod. If the Pod was quickly deleted from etcd by another component + // before the second reconciliation. This would lead to never satisfying the expected condition. + // Avoid this by setting a timeout. + isPodSatisfied = rp.recordTimestamp.Add(ExpectationsTimeout).Before(time.Now()) + } + case Delete: + if err := r.Get(ctx, types.NamespacedName{Name: rp.name, Namespace: namespace}, pod); err != nil { + isPodSatisfied = errors.IsNotFound(err) + } else { + isPodSatisfied = pod.DeletionTimestamp != nil + } + } + // delete satisfied item in cache + if isPodSatisfied { + if err := r.itemsCache.Delete(items[i]); err != nil { + // Fatal error in KeyFunc. + panic(err) + } + } else { + isSatisfied = false + } + } + return isSatisfied +} + +func (r *rayClusterScaleExpectationImpl) Delete(rayClusterName, namespace string) { + items, err := r.itemsCache.ByIndex(RayClusterIndex, fmt.Sprintf("%s/%s", namespace, rayClusterName)) + if err != nil { + // An error occurs when there is no corresponding IndexFunc for RayClusterIndex. This should be a fatal error. + panic(err) + } + for _, item := range items { + if err := r.itemsCache.Delete(item); err != nil { + // Fatal error in KeyFunc. + panic(err) + } + } +} + +type rayPod struct { + recordTimestamp time.Time + action ScaleAction + name string + namespace string + rayCluster string + group string +} + +func (p *rayPod) Key() string { + return fmt.Sprintf("%s/%s", p.namespace, p.name) +} + +func (p *rayPod) GroupKey() string { + return fmt.Sprintf("%s/%s/%s", p.namespace, p.rayCluster, p.group) +} + +func (p *rayPod) ClusterKey() string { + return fmt.Sprintf("%s/%s", p.namespace, p.rayCluster) +} + +// rayPodKey is used only for getting rayPod.Key(). The type of obj must be rayPod. +func rayPodKey(obj interface{}) (string, error) { + return obj.(*rayPod).Key(), nil +} + +// groupIndexFunc is used only for getting rayPod.GroupKey(). The type of obj must be rayPod. +func groupIndexFunc(obj interface{}) ([]string, error) { + return []string{obj.(*rayPod).GroupKey()}, nil +} + +// rayClusterIndexFunc is used only for getting rayPod.ClusterKey(). The type of obj must be rayPod. +func rayClusterIndexFunc(obj interface{}) ([]string, error) { + return []string{obj.(*rayPod).ClusterKey()}, nil +} + +func NewFakeRayClusterScaleExpectation() RayClusterScaleExpectation { + return &fakeRayClusterScaleExpectation{} +} + +type fakeRayClusterScaleExpectation struct{} + +func (r *fakeRayClusterScaleExpectation) ExpectScalePod(_, _, _, _ string, _ ScaleAction) { +} + +func (r *fakeRayClusterScaleExpectation) IsSatisfied(_ context.Context, _, _, _ string) bool { + return true +} + +func (r *fakeRayClusterScaleExpectation) Delete(_, _ string) {} diff --git a/ray-operator/controllers/ray/expectations/scale_expectations_test.go b/ray-operator/controllers/ray/expectations/scale_expectations_test.go new file mode 100644 index 00000000000..d152f09f9bc --- /dev/null +++ b/ray-operator/controllers/ray/expectations/scale_expectations_test.go @@ -0,0 +1,167 @@ +package expectations + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestRayClusterExpectationsHeadPod(t *testing.T) { + ctx := context.Background() + // Simulate local Informer with fakeClient. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects().Build() + exp := NewRayClusterScaleExpectation(fakeClient) + namespace := "default" + rayClusterName := "raycluster-test" + testPods := getTestPod() + + // Expect create head pod. + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Create) + // There is no head pod in Informer, return false. + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), false) + // Add a pod to the informer. This is used to simulate the informer syncing with the head pod in etcd. + // In reality, it should be automatically done by the informer. + err := fakeClient.Create(ctx, &testPods[0]) + assert.NoError(t, err, "Fail to create head pod") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) + // Expect delete head pod. + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Delete) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), false) + // Delete head pod from the informer. + err = fakeClient.Delete(ctx, &testPods[0]) + assert.NoError(t, err, "Fail to delete head pod") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) +} + +func TestRayClusterExpectationsForSamePod(t *testing.T) { + ctx := context.Background() + // Simulate local Informer with fakeClient. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects().Build() + exp := NewRayClusterScaleExpectation(fakeClient) + namespace := "default" + rayClusterName := "raycluster-test" + testPods := getTestPod() + + // Expect the same Pod to be created and deleted. + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Create) + // Delete, override the expectation for the same Pod + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Delete) + // There is no pod in the informer. Satisfied. And delete expectation. + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) + err := fakeClient.Create(ctx, &testPods[0]) + assert.NoError(t, err, "Fail to create head pod") + // No expectation + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) + err = fakeClient.Delete(ctx, &testPods[0]) + assert.NoError(t, err, "Fail to delete head pod") + // No expectation + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) +} + +func TestRayClusterExpectationsWorkerGroupPods(t *testing.T) { + ctx := context.Background() + // Simulate local Informer with fakeClient. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects().Build() + exp := NewRayClusterScaleExpectation(fakeClient) + namespace := "default" + rayClusterName := "raycluster-test" + groupA := "test-group-a" + groupB := "test-group-b" + testPods := getTestPod() + // Expect create one worker pod in group-a, two worker pods in group-b. + exp.ExpectScalePod(namespace, rayClusterName, groupA, testPods[0].Name, Create) + exp.ExpectScalePod(namespace, rayClusterName, groupB, testPods[1].Name, Create) + exp.ExpectScalePod(namespace, rayClusterName, groupB, testPods[2].Name, Create) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), false) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), false) + assert.NoError(t, fakeClient.Create(ctx, &testPods[1]), "Fail to create worker pod2") + // All pods within the same group are expected to meet. + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), false) + assert.NoError(t, fakeClient.Create(ctx, &testPods[2]), "Fail to create worker pod3") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), true) + // Different groups do not affect each other. + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), false) + assert.NoError(t, fakeClient.Create(ctx, &testPods[0]), "Fail to create worker pod1") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), true) + + // Expect delete. + exp.ExpectScalePod(namespace, rayClusterName, groupA, testPods[0].Name, Delete) + exp.ExpectScalePod(namespace, rayClusterName, groupB, testPods[1].Name, Delete) + exp.ExpectScalePod(namespace, rayClusterName, groupB, testPods[2].Name, Delete) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), false) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), false) + assert.NoError(t, fakeClient.Delete(ctx, &testPods[1]), "Fail to delete worker pod2") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), false) + assert.NoError(t, fakeClient.Delete(ctx, &testPods[2]), "Fail to delete worker pod3") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupB), true) + // Different groups do not affect each other. + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), false) + assert.NoError(t, fakeClient.Delete(ctx, &testPods[0]), "Fail to delete worker pod1") + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, groupA), true) +} + +func TestRayClusterExpectationsDeleteAll(t *testing.T) { + ctx := context.Background() + // Simulate local Informer with fakeClient. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects().Build() + exp := NewRayClusterScaleExpectation(fakeClient) + namespace := "default" + rayClusterName := "raycluster-test" + group := "test-group" + testPods := getTestPod() + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Create) + exp.ExpectScalePod(namespace, rayClusterName, group, testPods[1].Name, Create) + exp.ExpectScalePod(namespace, rayClusterName, group, testPods[2].Name, Delete) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), false) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, group), false) + // Delete all expectations + exp.Delete(rayClusterName, namespace) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, group), true) +} + +func TestRayClusterExpectationsTimeout(t *testing.T) { + ctx := context.Background() + // Reduce the timeout duration so that tests don't have to wait for a long time. + ExpectationsTimeout = 1 * time.Second + // Simulate local Informer with fakeClient. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects().Build() + exp := NewRayClusterScaleExpectation(fakeClient) + namespace := "default" + rayClusterName := "raycluster-test" + testPods := getTestPod() + + exp.ExpectScalePod(namespace, rayClusterName, HeadGroup, testPods[0].Name, Create) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), false) + // Expectations should be released after timeout. + time.Sleep(ExpectationsTimeout + 1*time.Second) + assert.Equal(t, exp.IsSatisfied(ctx, namespace, rayClusterName, HeadGroup), true) +} + +func getTestPod() []corev1.Pod { + return []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: "default", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: "default", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod3", + Namespace: "default", + }, + }, + } +} diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index 63b584c11d7..86a839ddc40 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -2,29 +2,35 @@ package ray import ( "context" + errstd "errors" "fmt" "os" "reflect" + "runtime" "strconv" "strings" "time" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" + configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/expectations" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/features" batchv1 "k8s.io/api/batch/v1" rbacv1 "k8s.io/api/rbac/v1" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "k8s.io/client-go/tools/record" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/go-logr/logr" routev1 "github.com/openshift/api/route/v1" - _ "k8s.io/api/apps/v1beta1" "k8s.io/client-go/discovery" "k8s.io/client-go/rest" @@ -32,22 +38,21 @@ import ( networkingv1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" + k8sruntime "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" - controller "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) +type reconcileFunc func(context.Context, *rayv1.RayCluster) error + var ( DefaultRequeueDuration = 2 * time.Second - ForcedClusterUpgrade bool - EnableBatchScheduler bool // Definition of a index field for pod name podUIDIndexField = "metadata.uid" @@ -76,27 +81,24 @@ func getClusterType(ctx context.Context) bool { if err != nil { logger.Info("Error while querying ServerGroups, assuming we're on Vanilla Kubernetes") return false - } else { - for i := 0; i < len(apiGroupList.Groups); i++ { - if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") { - logger.Info("We detected being on OpenShift!") - return true - } + } + for i := 0; i < len(apiGroupList.Groups); i++ { + if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") { + logger.Info("We detected being on OpenShift!") + return true } - return false } - } else { - logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes") return false } - } else { - logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes") + logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes") return false } + logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes") + return false } // NewReconciler returns a new reconcile.Reconciler -func NewReconciler(ctx context.Context, mgr manager.Manager, options RayClusterReconcilerOptions) *RayClusterReconciler { +func NewReconciler(ctx context.Context, mgr manager.Manager, options RayClusterReconcilerOptions, rayConfigs configapi.Configuration) *RayClusterReconciler { if err := mgr.GetFieldIndexer().IndexField(ctx, &corev1.Pod{}, podUIDIndexField, func(rawObj client.Object) []string { pod := rawObj.(*corev1.Pod) return []string{string(pod.UID)} @@ -104,31 +106,41 @@ func NewReconciler(ctx context.Context, mgr manager.Manager, options RayClusterR panic(err) } isOpenShift := getClusterType(ctx) + // init the batch scheduler manager + schedulerMgr, err := batchscheduler.NewSchedulerManager(rayConfigs, mgr.GetConfig()) + if err != nil { + // fail fast if the scheduler plugin fails to init + // prevent running the controller in an undefined state + panic(err) + } + // add schema to runtime + schedulerMgr.AddToScheme(mgr.GetScheme()) return &RayClusterReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Recorder: mgr.GetEventRecorderFor("raycluster-controller"), - BatchSchedulerMgr: batchscheduler.NewSchedulerManager(mgr.GetConfig()), + BatchSchedulerMgr: schedulerMgr, IsOpenShift: isOpenShift, - headSidecarContainers: options.HeadSidecarContainers, - workerSidecarContainers: options.WorkerSidecarContainers, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(mgr.GetClient()), + headSidecarContainers: options.HeadSidecarContainers, + workerSidecarContainers: options.WorkerSidecarContainers, } } -var _ reconcile.Reconciler = &RayClusterReconciler{} - // RayClusterReconciler reconciles a RayCluster object type RayClusterReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - BatchSchedulerMgr *batchscheduler.SchedulerManager - IsOpenShift bool + Scheme *k8sruntime.Scheme + Recorder record.EventRecorder + BatchSchedulerMgr *batchscheduler.SchedulerManager + rayClusterScaleExpectation expectations.RayClusterScaleExpectation headSidecarContainers []corev1.Container workerSidecarContainers []corev1.Container + + IsOpenShift bool } type RayClusterReconcilerOptions struct { @@ -166,11 +178,13 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, request ctrl.Reque // Try to fetch the RayCluster instance instance := &rayv1.RayCluster{} if err = r.Get(ctx, request.NamespacedName, instance); err == nil { - return r.rayClusterReconcile(ctx, request, instance) + return r.rayClusterReconcile(ctx, instance) } // No match found if errors.IsNotFound(err) { + // Clear all related expectations + r.rayClusterScaleExpectation.Delete(instance.Name, instance.Namespace) logger.Info("Read request instance not found error!") } else { logger.Error(err, "Read request instance error!") @@ -197,9 +211,96 @@ func (r *RayClusterReconciler) deleteAllPods(ctx context.Context, filters common return pods, nil } -func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request ctrl.Request, instance *rayv1.RayCluster) (ctrl.Result, error) { +// Validation for invalid Ray Cluster configurations. +func validateRayClusterSpec(instance *rayv1.RayCluster) error { + if len(instance.Spec.HeadGroupSpec.Template.Spec.Containers) == 0 { + return fmt.Errorf("headGroupSpec should have at least one container") + } + + for _, workerGroup := range instance.Spec.WorkerGroupSpecs { + if len(workerGroup.Template.Spec.Containers) == 0 { + return fmt.Errorf("workerGroupSpec should have at least one container") + } + } + + if instance.Annotations[utils.RayFTEnabledAnnotationKey] != "" && instance.Spec.GcsFaultToleranceOptions != nil { + return fmt.Errorf("%s annotation and GcsFaultToleranceOptions are both set. "+ + "Please use only GcsFaultToleranceOptions to configure GCS fault tolerance", utils.RayFTEnabledAnnotationKey) + } + + if !utils.IsGCSFaultToleranceEnabled(*instance) { + if utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, instance.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env) { + return fmt.Errorf("%s is set which implicitly enables GCS fault tolerance, "+ + "but GcsFaultToleranceOptions is not set. Please set GcsFaultToleranceOptions "+ + "to enable GCS fault tolerance", utils.RAY_REDIS_ADDRESS) + } + } + + if instance.Spec.GcsFaultToleranceOptions != nil { + if redisPassword := instance.Spec.HeadGroupSpec.RayStartParams["redis-password"]; redisPassword != "" { + return fmt.Errorf("cannot set `redis-password` in rayStartParams when " + + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead") + } + + headContainer := instance.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex] + if utils.EnvVarExists(utils.REDIS_PASSWORD, headContainer.Env) { + return fmt.Errorf("cannot set `REDIS_PASSWORD` env var in head Pod when " + + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead") + } + + if utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, headContainer.Env) { + return fmt.Errorf("cannot set `RAY_REDIS_ADDRESS` env var in head Pod when " + + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisAddress instead") + } + + if instance.Annotations[utils.RayExternalStorageNSAnnotationKey] != "" { + return fmt.Errorf("cannot set `ray.io/external-storage-namespace` annotation when " + + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.ExternalStorageNamespace instead") + } + } + + if !features.Enabled(features.RayJobDeletionPolicy) { + for _, workerGroup := range instance.Spec.WorkerGroupSpecs { + if workerGroup.Suspend != nil && *workerGroup.Suspend { + return fmt.Errorf("suspending worker groups is currently available when the RayJobDeletionPolicy feature gate is enabled") + } + } + } + + if utils.IsAutoscalingEnabled(instance) { + for _, workerGroup := range instance.Spec.WorkerGroupSpecs { + if workerGroup.Suspend != nil && *workerGroup.Suspend { + // TODO (rueian): This can be supported in future Ray. We should check the RayVersion once we know the version. + return fmt.Errorf("suspending worker groups is not currently supported with Autoscaler enabled") + } + } + } + return nil +} + +func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, instance *rayv1.RayCluster) (ctrl.Result, error) { + var reconcileErr error logger := ctrl.LoggerFrom(ctx) + if manager := utils.ManagedByExternalController(instance.Spec.ManagedBy); manager != nil { + logger.Info("Skipping RayCluster managed by a custom controller", "managed-by", manager) + return ctrl.Result{}, nil + } + + if err := validateRayClusterSpec(instance); err != nil { + logger.Error(err, fmt.Sprintf("The RayCluster spec is invalid %s/%s", instance.Namespace, instance.Name)) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.InvalidRayClusterSpec), + "The RayCluster spec is invalid %s/%s: %v", instance.Namespace, instance.Name, err) + return ctrl.Result{}, nil + } + + if err := utils.ValidateRayClusterStatus(instance); err != nil { + logger.Error(err, "The RayCluster status is invalid") + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.InvalidRayClusterStatus), + "The RayCluster status is invalid %s/%s, %v", instance.Namespace, instance.Name, err) + return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err + } + // Please do NOT modify `originalRayClusterInstance` in the following code. originalRayClusterInstance := instance.DeepCopy() @@ -209,7 +310,7 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request // manually after the RayCluster CR deletion. enableGCSFTRedisCleanup := strings.ToLower(os.Getenv(utils.ENABLE_GCS_FT_REDIS_CLEANUP)) != "false" - if enableGCSFTRedisCleanup && common.IsGCSFaultToleranceEnabled(*instance) { + if enableGCSFTRedisCleanup && utils.IsGCSFaultToleranceEnabled(*instance) { if instance.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(instance, utils.GCSFaultToleranceRedisCleanupFinalizer) { logger.Info( @@ -217,7 +318,7 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request "finalizer", utils.GCSFaultToleranceRedisCleanupFinalizer) controllerutil.AddFinalizer(instance, utils.GCSFaultToleranceRedisCleanupFinalizer) if err := r.Update(ctx, instance); err != nil { - logger.Error(err, fmt.Sprintf("Failed to add the finalizer %s to the RayCluster.", utils.GCSFaultToleranceRedisCleanupFinalizer)) + err = fmt.Errorf("failed to add the finalizer %s to the RayCluster: %w", utils.GCSFaultToleranceRedisCleanupFinalizer, err) return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } // Only start the RayCluster reconciliation after the finalizer is added. @@ -225,9 +326,10 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request } } else { logger.Info( - fmt.Sprintf("The RayCluster with GCS enabled, %s, is being deleted. Start to handle the Redis cleanup finalizer %s.", - instance.Name, utils.GCSFaultToleranceRedisCleanupFinalizer), - "DeletionTimestamp", instance.ObjectMeta.DeletionTimestamp) + "The RayCluster with GCS enabled is being deleted. Start to handle the Redis cleanup finalizer.", + "redisCleanupFinalizer", utils.GCSFaultToleranceRedisCleanupFinalizer, + "deletionTimestamp", instance.ObjectMeta.DeletionTimestamp, + ) // Delete the head Pod if it exists. headPods, err := r.deleteAllPods(ctx, common.RayClusterHeadPodsAssociationOptions(instance)) @@ -239,18 +341,18 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } if len(headPods.Items) > 0 { - logger.Info(fmt.Sprintf( - "Wait for the head Pod %s to be terminated before initiating the Redis cleanup process. "+ - "The storage namespace %s in Redis cannot be fully deleted if the GCS process on the head Pod is still writing to it.", - headPods.Items[0].Name, headPods.Items[0].Annotations[utils.RayExternalStorageNSAnnotationKey])) + logger.Info( + "Wait for the head Pod to be terminated before initiating the Redis cleanup process. "+"The storage namespace in Redis cannot be fully deleted if the GCS process on the head Pod is still writing to it.", + "headPodName", headPods.Items[0].Name, + "redisStorageNamespace", headPods.Items[0].Annotations[utils.RayExternalStorageNSAnnotationKey], + ) // Requeue after 10 seconds because it takes much longer than DefaultRequeueDuration (2 seconds) for the head Pod to be terminated. return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } - // We can start the Redis cleanup process now because the head Pod has been terminated. - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode)} + filterLabels := common.RayClusterRedisCleanupJobAssociationOptions(instance).ToListOptions() redisCleanupJobs := batchv1.JobList{} - if err := r.List(ctx, &redisCleanupJobs, client.InNamespace(instance.Namespace), filterLabels); err != nil { + if err := r.List(ctx, &redisCleanupJobs, filterLabels...); err != nil { return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } @@ -266,113 +368,91 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request } switch condition { case batchv1.JobComplete: - logger.Info(fmt.Sprintf( - "The Redis cleanup Job %s has been completed. "+ - "The storage namespace %s in Redis has been fully deleted.", - redisCleanupJob.Name, redisCleanupJob.Annotations[utils.RayExternalStorageNSAnnotationKey])) + logger.Info( + "The Redis cleanup Job has been completed. "+ + "The storage namespace in Redis has been fully deleted.", + "redisCleanupJobName", redisCleanupJob.Name, + "redisStorageNamespace", redisCleanupJob.Annotations[utils.RayExternalStorageNSAnnotationKey], + ) case batchv1.JobFailed: - logger.Info(fmt.Sprintf( - "The Redis cleanup Job %s has failed, requeue the RayCluster CR after 5 minute. "+ - "You should manually delete the storage namespace %s in Redis and remove the RayCluster's finalizer. "+ + logger.Info( + "The Redis cleanup Job has failed, requeue the RayCluster CR after 5 minute. "+ + "You should manually delete the storage namespace in Redis and remove the RayCluster's finalizer. "+ "Please check https://docs.ray.io/en/master/cluster/kubernetes/user-guides/kuberay-gcs-ft.html for more details.", - redisCleanupJob.Name, redisCleanupJob.Annotations[utils.RayExternalStorageNSAnnotationKey])) + "redisCleanupJobName", redisCleanupJob.Name, + "redisStorageNamespace", redisCleanupJob.Annotations[utils.RayExternalStorageNSAnnotationKey], + ) } return ctrl.Result{}, nil - } else { // the redisCleanupJob is still running - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil } - } else { - redisCleanupJob := r.buildRedisCleanupJob(ctx, *instance) - if err := r.Create(ctx, &redisCleanupJob); err != nil { - if errors.IsAlreadyExists(err) { - logger.Info(fmt.Sprintf("Redis cleanup Job already exists. Requeue the RayCluster CR %s.", instance.Name)) - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil - } - logger.Error(err, "Failed to create Redis cleanup Job") - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err + // the redisCleanupJob is still running + return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil + } + redisCleanupJob := r.buildRedisCleanupJob(ctx, *instance) + if err := r.Create(ctx, &redisCleanupJob); err != nil { + if errors.IsAlreadyExists(err) { + logger.Info("Redis cleanup Job already exists. Requeue the RayCluster CR.") + return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil } - logger.Info("Successfully created Redis cleanup Job", "Job name", redisCleanupJob.Name) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateRedisCleanupJob), + "Failed to create Redis cleanup Job %s/%s, %v", redisCleanupJob.Namespace, redisCleanupJob.Name, err) + return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } + logger.Info("Created Redis cleanup Job", "name", redisCleanupJob.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedRedisCleanupJob), + "Created Redis cleanup Job %s/%s", redisCleanupJob.Namespace, redisCleanupJob.Name) return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil } } if instance.DeletionTimestamp != nil && !instance.DeletionTimestamp.IsZero() { - logger.Info("RayCluster is being deleted, just ignore", "cluster name", request.Name) + logger.Info("RayCluster is being deleted, just ignore") return ctrl.Result{}, nil } - if err := r.reconcileAutoscalerServiceAccount(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err + reconcileFuncs := []reconcileFunc{ + r.reconcileAutoscalerServiceAccount, + r.reconcileAutoscalerRole, + r.reconcileAutoscalerRoleBinding, + r.reconcileIngress, + r.reconcileHeadService, + r.reconcileHeadlessService, + r.reconcileServeService, + r.reconcilePods, } - if err := r.reconcileAutoscalerRole(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - if err := r.reconcileAutoscalerRoleBinding(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - if err := r.reconcileIngress(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - if err := r.reconcileHeadService(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - if err := r.reconcileHeadlessService(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - // Only reconcile the K8s service for Ray Serve when the "ray.io/enable-serve-service" annotation is set to true. - if enableServeServiceValue, exist := instance.Annotations[utils.EnableServeServiceKey]; exist && enableServeServiceValue == utils.EnableServeServiceTrue { - if err := r.reconcileServeService(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } - } - if err := r.reconcilePods(ctx, instance); err != nil { - if updateErr := r.updateClusterState(ctx, instance, rayv1.Failed); updateErr != nil { - logger.Error(updateErr, "RayCluster update state error", "cluster name", request.Name) - } - if updateErr := r.updateClusterReason(ctx, instance, err.Error()); updateErr != nil { - logger.Error(updateErr, "RayCluster update reason error", "cluster name", request.Name) + for _, fn := range reconcileFuncs { + if reconcileErr = fn(ctx, instance); reconcileErr != nil { + funcName := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() + logger.Error(reconcileErr, "Error reconcile resources", "function name", funcName) + break } - r.Recorder.Event(instance, corev1.EventTypeWarning, string(rayv1.PodReconciliationError), err.Error()) - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } // Calculate the new status for the RayCluster. Note that the function will deep copy `instance` instead of mutating it. - newInstance, err := r.calculateStatus(ctx, instance) - if err != nil { - logger.Info("Got error when calculating new status", "cluster name", request.Name, "error", err) - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err + newInstance, calculateErr := r.calculateStatus(ctx, instance, reconcileErr) + var updateErr error + var inconsistent bool + if calculateErr != nil { + logger.Info("Got error when calculating new status", "error", calculateErr) + } else { + inconsistent, updateErr = r.updateRayClusterStatus(ctx, originalRayClusterInstance, newInstance) } - // Check if need to update the status. - if r.inconsistentRayClusterStatus(ctx, originalRayClusterInstance.Status, newInstance.Status) { - logger.Info("rayClusterReconcile", "Update CR status", request.Name, "status", newInstance.Status) - if err := r.Status().Update(ctx, newInstance); err != nil { - logger.Info("Got error when updating status", "cluster name", request.Name, "error", err, "RayCluster", newInstance) - return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err - } + // Return error based on order. + var err error + if reconcileErr != nil { + err = reconcileErr + } else if calculateErr != nil { + err = calculateErr + } else { + err = updateErr + } + // If the custom resource's status is updated, requeue the reconcile key. + // Without this behavior, atomic operations such as the suspend operation would need to wait for `RAYCLUSTER_DEFAULT_REQUEUE_SECONDS` to delete Pods + // after the condition rayv1.RayClusterSuspending is set to true. + if err != nil || inconsistent { + return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err } // Unconditionally requeue after the number of seconds specified in the @@ -380,10 +460,14 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request // environment variable is not set, requeue after the default value. requeueAfterSeconds, err := strconv.Atoi(os.Getenv(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV)) if err != nil { - logger.Info(fmt.Sprintf("Environment variable %s is not set, using default value of %d seconds", utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV, utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS), "cluster name", request.Name) + logger.Info( + "Environment variable is not set, using default value of seconds", + "environmentVariable", utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV, + "defaultValue", utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS, + ) requeueAfterSeconds = utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS } - logger.Info("Unconditional requeue after", "cluster name", request.Name, "seconds", requeueAfterSeconds) + logger.Info("Unconditional requeue after", "seconds", requeueAfterSeconds) return ctrl.Result{RequeueAfter: time.Duration(requeueAfterSeconds) * time.Second}, nil } @@ -395,25 +479,49 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request // this field should be used to determine whether to update this CR or not. func (r *RayClusterReconciler) inconsistentRayClusterStatus(ctx context.Context, oldStatus rayv1.RayClusterStatus, newStatus rayv1.RayClusterStatus) bool { logger := ctrl.LoggerFrom(ctx) - if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason { - logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf( - "old State: %s, new State: %s, old Reason: %s, new Reason: %s", - oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason)) + + if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + logger.Info( + "inconsistentRayClusterStatus", + "oldState", oldStatus.State, //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + "newState", newStatus.State, //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + "oldReason", oldStatus.Reason, + "newReason", newStatus.Reason, + ) return true } - if oldStatus.AvailableWorkerReplicas != newStatus.AvailableWorkerReplicas || oldStatus.DesiredWorkerReplicas != newStatus.DesiredWorkerReplicas || - oldStatus.MinWorkerReplicas != newStatus.MinWorkerReplicas || oldStatus.MaxWorkerReplicas != newStatus.MaxWorkerReplicas { - logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf( - "old AvailableWorkerReplicas: %d, new AvailableWorkerReplicas: %d, old DesiredWorkerReplicas: %d, new DesiredWorkerReplicas: %d, "+ - "old MinWorkerReplicas: %d, new MinWorkerReplicas: %d, old MaxWorkerReplicas: %d, new MaxWorkerReplicas: %d", - oldStatus.AvailableWorkerReplicas, newStatus.AvailableWorkerReplicas, oldStatus.DesiredWorkerReplicas, newStatus.DesiredWorkerReplicas, - oldStatus.MinWorkerReplicas, newStatus.MinWorkerReplicas, oldStatus.MaxWorkerReplicas, newStatus.MaxWorkerReplicas)) + if oldStatus.ReadyWorkerReplicas != newStatus.ReadyWorkerReplicas || + oldStatus.AvailableWorkerReplicas != newStatus.AvailableWorkerReplicas || + oldStatus.DesiredWorkerReplicas != newStatus.DesiredWorkerReplicas || + oldStatus.MinWorkerReplicas != newStatus.MinWorkerReplicas || + oldStatus.MaxWorkerReplicas != newStatus.MaxWorkerReplicas { + logger.Info( + "inconsistentRayClusterStatus", + "oldReadyWorkerReplicas", oldStatus.ReadyWorkerReplicas, + "newReadyWorkerReplicas", newStatus.ReadyWorkerReplicas, + "oldAvailableWorkerReplicas", oldStatus.AvailableWorkerReplicas, + "newAvailableWorkerReplicas", newStatus.AvailableWorkerReplicas, + "oldDesiredWorkerReplicas", oldStatus.DesiredWorkerReplicas, + "newDesiredWorkerReplicas", newStatus.DesiredWorkerReplicas, + "oldMinWorkerReplicas", oldStatus.MinWorkerReplicas, + "newMinWorkerReplicas", newStatus.MinWorkerReplicas, + "oldMaxWorkerReplicas", oldStatus.MaxWorkerReplicas, + "newMaxWorkerReplicas", newStatus.MaxWorkerReplicas, + ) return true } if !reflect.DeepEqual(oldStatus.Endpoints, newStatus.Endpoints) || !reflect.DeepEqual(oldStatus.Head, newStatus.Head) { - logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf( - "old Endpoints: %v, new Endpoints: %v, old Head: %v, new Head: %v", - oldStatus.Endpoints, newStatus.Endpoints, oldStatus.Head, newStatus.Head)) + logger.Info( + "inconsistentRayClusterStatus", + "oldEndpoints", oldStatus.Endpoints, + "newEndpoints", newStatus.Endpoints, + "oldHead", oldStatus.Head, + "newHead", newStatus.Head, + ) + return true + } + if !reflect.DeepEqual(oldStatus.Conditions, newStatus.Conditions) { + logger.Info("inconsistentRayClusterStatus", "old conditions", oldStatus.Conditions, "new conditions", newStatus.Conditions) return true } return false @@ -429,30 +537,27 @@ func (r *RayClusterReconciler) reconcileIngress(ctx context.Context, instance *r if r.IsOpenShift { // This is open shift - create route return r.reconcileRouteOpenShift(ctx, instance) - } else { - // plain vanilla kubernetes - create ingress - return r.reconcileIngressKubernetes(ctx, instance) } + // plain vanilla kubernetes - create ingress + return r.reconcileIngressKubernetes(ctx, instance) } func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) headRoutes := routev1.RouteList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name} - if err := r.List(ctx, &headRoutes, client.InNamespace(instance.Namespace), filterLabels); err != nil { - logger.Error(err, "Route Listing error!", "Route.Error", err) + filterLabels := common.RayClusterNetworkResourcesOptions(instance).ToListOptions() + if err := r.List(ctx, &headRoutes, filterLabels...); err != nil { return err } - if headRoutes.Items != nil && len(headRoutes.Items) == 1 { + if len(headRoutes.Items) == 1 { logger.Info("reconcileIngresses", "head service route found", headRoutes.Items[0].Name) return nil } - if headRoutes.Items == nil || len(headRoutes.Items) == 0 { + if len(headRoutes.Items) == 0 { route, err := common.BuildRouteForHeadService(*instance) if err != nil { - logger.Error(err, "Failed building route!", "Route.Error", err) return err } @@ -460,9 +565,7 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst return err } - err = r.createHeadRoute(ctx, route, instance) - if err != nil { - logger.Error(err, "Failed creating route!", "Route.Error", err) + if err := r.createHeadRoute(ctx, route, instance); err != nil { return err } } @@ -473,17 +576,17 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst func (r *RayClusterReconciler) reconcileIngressKubernetes(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) headIngresses := networkingv1.IngressList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name} - if err := r.List(ctx, &headIngresses, client.InNamespace(instance.Namespace), filterLabels); err != nil { + filterLabels := common.RayClusterNetworkResourcesOptions(instance).ToListOptions() + if err := r.List(ctx, &headIngresses, filterLabels...); err != nil { return err } - if headIngresses.Items != nil && len(headIngresses.Items) == 1 { + if len(headIngresses.Items) == 1 { logger.Info("reconcileIngresses", "head service ingress found", headIngresses.Items[0].Name) return nil } - if headIngresses.Items == nil || len(headIngresses.Items) == 0 { + if len(headIngresses.Items) == 0 { ingress, err := common.BuildIngressForHeadService(ctx, *instance) if err != nil { return err @@ -493,8 +596,7 @@ func (r *RayClusterReconciler) reconcileIngressKubernetes(ctx context.Context, i return err } - err = r.createHeadIngress(ctx, ingress, instance) - if err != nil { + if err := r.createHeadIngress(ctx, ingress, instance); err != nil { return err } } @@ -506,9 +608,9 @@ func (r *RayClusterReconciler) reconcileIngressKubernetes(ctx context.Context, i func (r *RayClusterReconciler) reconcileHeadService(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) services := corev1.ServiceList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)} + filterLabels := common.RayClusterHeadServiceListOptions(instance) - if err := r.List(ctx, &services, client.InNamespace(instance.Namespace), filterLabels); err != nil { + if err := r.List(ctx, &services, filterLabels...); err != nil { return err } @@ -538,8 +640,8 @@ func (r *RayClusterReconciler) reconcileHeadService(ctx context.Context, instanc headSvc, err := common.BuildServiceForHeadPod(ctx, *instance, labels, annotations) // TODO (kevin85421): Provide a detailed and actionable error message. For example, which port is missing? if len(headSvc.Spec.Ports) == 0 { - logger.Info("Ray head service does not have any ports set up. Service specification: %v", headSvc.Spec) - return fmt.Errorf("Ray head service does not have any ports set up. Service specification: %v", headSvc.Spec) + logger.Info("Ray head service does not have any ports set up.", "serviceSpecification", headSvc.Spec) + return fmt.Errorf("ray head service does not have any ports set up. Service specification: %v", headSvc.Spec) } if err != nil { @@ -556,6 +658,11 @@ func (r *RayClusterReconciler) reconcileHeadService(ctx context.Context, instanc // Return nil only when the serve service successfully created or already exists. func (r *RayClusterReconciler) reconcileServeService(ctx context.Context, instance *rayv1.RayCluster) error { + // Only reconcile the K8s service for Ray Serve when the "ray.io/enable-serve-service" annotation is set to true. + if enableServeServiceValue, exist := instance.Annotations[utils.EnableServeServiceKey]; !exist || enableServeServiceValue != utils.EnableServeServiceTrue { + return nil + } + // Retrieve the Service from the Kubernetes cluster with the name and namespace. svc := &corev1.Service{} err := r.Get(ctx, common.RayClusterServeServiceNamespacedName(instance), svc) @@ -573,13 +680,9 @@ func (r *RayClusterReconciler) reconcileServeService(ctx context.Context, instan return err } // create service - if err := r.Create(ctx, svc); err != nil { - return err - } - return nil - } else { - return err + return r.Create(ctx, svc) } + return err } // Return nil only when the headless service for multi-host worker groups is successfully created or already exists. @@ -604,16 +707,12 @@ func (r *RayClusterReconciler) reconcileHeadlessService(ctx context.Context, ins if len(services.Items) != 0 { // service exists, do nothing return nil - } else { - // Create headless tpu worker service if there's no existing one in the cluster. - headlessSvc, err := common.BuildHeadlessServiceForRayCluster(*instance) - if err != nil { - return err - } + } + // Create headless tpu worker service if there's no existing one in the cluster. + headlessSvc := common.BuildHeadlessServiceForRayCluster(*instance) - if err := r.createService(ctx, headlessSvc, instance); err != nil { - return err - } + if err := r.createService(ctx, headlessSvc, instance); err != nil { + return err } } @@ -623,25 +722,43 @@ func (r *RayClusterReconciler) reconcileHeadlessService(ctx context.Context, ins func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) - // if RayCluster is suspended, delete all pods and skip reconcile - if instance.Spec.Suspend != nil && *instance.Spec.Suspend { + // if RayCluster is suspending, delete all pods and skip reconcile + suspendStatus := utils.FindRayClusterSuspendStatus(instance) + statusConditionGateEnabled := features.Enabled(features.RayClusterStatusConditions) + if suspendStatus == rayv1.RayClusterSuspending || + (!statusConditionGateEnabled && instance.Spec.Suspend != nil && *instance.Spec.Suspend) { if _, err := r.deleteAllPods(ctx, common.RayClusterAllPodsAssociationOptions(instance)); err != nil { - return err + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeletePodCollection), + "Failed deleting Pods due to suspension for RayCluster %s/%s, %v", + instance.Namespace, instance.Name, err) + return errstd.Join(utils.ErrFailedDeleteAllPods, err) } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Deleted", + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedPod), "Deleted Pods for RayCluster %s/%s due to suspension", instance.Namespace, instance.Name) return nil } + if statusConditionGateEnabled { + if suspendStatus == rayv1.RayClusterSuspended { + return nil // stop reconcilePods because the cluster is suspended. + } + // (suspendStatus != rayv1.RayClusterSuspending) is always true here because it has been checked above. + if instance.Spec.Suspend != nil && *instance.Spec.Suspend { + return nil // stop reconcilePods because the cluster is going to suspend. + } + } + // check if all the pods exist headPods := corev1.PodList{} if err := r.List(ctx, &headPods, common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions()...); err != nil { return err } - if EnableBatchScheduler { - if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(instance); err == nil { + // check if the batch scheduler integration is enabled + // call the scheduler plugin if so + if r.BatchSchedulerMgr != nil { + if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(); err == nil { if err := scheduler.DoBatchSchedulingOnSubmission(ctx, instance); err != nil { return err } @@ -649,11 +766,13 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv return err } } - // Reconcile head Pod - if len(headPods.Items) == 1 { + if !r.rayClusterScaleExpectation.IsSatisfied(ctx, instance.Namespace, instance.Name, expectations.HeadGroup) { + logger.Info("reconcilePods", "Expectation", "NotSatisfiedHeadExpectations, reconcile head later") + } else if len(headPods.Items) == 1 { headPod := headPods.Items[0] logger.Info("reconcilePods", "Found 1 head Pod", headPod.Name, "Pod status", headPod.Status.Phase, + "Pod status reason", headPod.Status.Reason, "Pod restart policy", headPod.Spec.RestartPolicy, "Ray container terminated status", getRayContainerStateTerminated(headPod)) @@ -661,24 +780,28 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv logger.Info("reconcilePods", "head Pod", headPod.Name, "shouldDelete", shouldDelete, "reason", reason) if shouldDelete { if err := r.Delete(ctx, &headPod); err != nil { - return err + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteHeadPod), + "Failed deleting head Pod %s/%s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v, %v", + headPod.Namespace, headPod.Name, headPod.Status.Phase, headPod.Spec.RestartPolicy, getRayContainerStateTerminated(headPod), err) + return errstd.Join(utils.ErrFailedDeleteHeadPod, err) } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Deleted", - "Deleted head Pod %s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v", - headPod.Name, headPod.Status.Phase, headPod.Spec.RestartPolicy, getRayContainerStateTerminated(headPod)) - return fmt.Errorf(reason) + r.rayClusterScaleExpectation.ExpectScalePod(headPod.Namespace, instance.Name, expectations.HeadGroup, headPod.Name, expectations.Delete) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedHeadPod), + "Deleted head Pod %s/%s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v", + headPod.Namespace, headPod.Name, headPod.Status.Phase, headPod.Spec.RestartPolicy, getRayContainerStateTerminated(headPod)) + return errstd.New(reason) } } else if len(headPods.Items) == 0 { // Create head Pod if it does not exist. - logger.Info("reconcilePods", "Found 0 head Pods; creating a head Pod for the RayCluster.", instance.Name) + logger.Info("reconcilePods: Found 0 head Pods; creating a head Pod for the RayCluster.") common.CreatedClustersCounterInc(instance.Namespace) if err := r.createHeadPod(ctx, *instance); err != nil { common.FailedClustersCounterInc(instance.Namespace) - return err + return errstd.Join(utils.ErrFailedCreateHeadPod, err) } common.SuccessfulClustersCounterInc(instance.Namespace) } else if len(headPods.Items) > 1 { - logger.Info("reconcilePods", fmt.Sprintf("Found %d head Pods; deleting extra head Pods.", len(headPods.Items)), instance.Name) + logger.Info("reconcilePods: Found more than one head Pods; deleting extra head Pods.", "nHeadPods", len(headPods.Items)) // TODO (kevin85421): In-place update may not be a good idea. itemLength := len(headPods.Items) for index := 0; index < itemLength; index++ { @@ -692,49 +815,18 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv // delete all the extra head pod pods for _, extraHeadPodToDelete := range headPods.Items { if err := r.Delete(ctx, &extraHeadPodToDelete); err != nil { - return err - } - } - } - - if ForcedClusterUpgrade { - if len(headPods.Items) == 1 { - // head node amount is exactly 1, but we need to check if it has been changed - res := utils.PodNotMatchingTemplate(headPods.Items[0], instance.Spec.HeadGroupSpec.Template) - if res { - logger.Info(fmt.Sprintf("need to delete old head pod %s", headPods.Items[0].Name)) - if err := r.Delete(ctx, &headPods.Items[0]); err != nil { - return err - } - return nil - } - } - - // check if WorkerGroupSpecs has been changed and we need to kill worker pods - for _, worker := range instance.Spec.WorkerGroupSpecs { - workerPods := corev1.PodList{} - if err := r.List(ctx, &workerPods, common.RayClusterGroupPodsAssociationOptions(instance, worker.GroupName).ToListOptions()...); err != nil { - return err - } - updatedWorkerPods := false - for _, item := range workerPods.Items { - if utils.PodNotMatchingTemplate(item, worker.Template) { - logger.Info(fmt.Sprintf("need to delete old worker pod %s", item.Name)) - if err := r.Delete(ctx, &item); err != nil { - logger.Info(fmt.Sprintf("error deleting worker pod %s", item.Name)) - return err - } - updatedWorkerPods = true - } - } - if updatedWorkerPods { - return nil + return errstd.Join(utils.ErrFailedDeleteHeadPod, err) } + r.rayClusterScaleExpectation.ExpectScalePod(extraHeadPodToDelete.Namespace, instance.Name, expectations.HeadGroup, extraHeadPodToDelete.Name, expectations.Delete) } } // Reconcile worker pods now for _, worker := range instance.Spec.WorkerGroupSpecs { + if !r.rayClusterScaleExpectation.IsSatisfied(ctx, instance.Namespace, instance.Name, worker.GroupName) { + logger.Info("reconcilePods", "worker group", worker.GroupName, "Expectation", "NotSatisfiedGroupExpectations, reconcile the group later") + continue + } // workerReplicas will store the target number of pods for this worker group. var workerReplicas int32 = utils.GetWorkerGroupDesiredReplicas(ctx, worker) logger.Info("reconcilePods", "desired workerReplicas (always adhering to minReplicas/maxReplica)", workerReplicas, "worker group", worker.GroupName, "maxReplicas", worker.MaxReplicas, "minReplicas", worker.MinReplicas, "replicas", worker.Replicas) @@ -744,6 +836,18 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv return err } + // Delete all workers if worker group is suspended and skip reconcile + if worker.Suspend != nil && *worker.Suspend { + if _, err := r.deleteAllPods(ctx, common.RayClusterGroupPodsAssociationOptions(instance, worker.GroupName)); err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPodCollection), + "Failed deleting worker Pods for suspended group %s in RayCluster %s/%s, %v", worker.GroupName, instance.Namespace, instance.Name, err) + return errstd.Join(utils.ErrFailedDeleteWorkerPod, err) + } + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedWorkerPod), + "Deleted all pods for suspended worker group %s in RayCluster %s/%s", worker.GroupName, instance.Namespace, instance.Name) + continue + } + // Delete unhealthy worker Pods. deletedWorkers := make(map[string]struct{}) deleted := struct{}{} @@ -755,17 +859,21 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv numDeletedUnhealthyWorkerPods++ deletedWorkers[workerPod.Name] = deleted if err := r.Delete(ctx, &workerPod); err != nil { - return err + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPod), + "Failed deleting worker Pod %s/%s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v, %v", + workerPod.Namespace, workerPod.Name, workerPod.Status.Phase, workerPod.Spec.RestartPolicy, getRayContainerStateTerminated(workerPod), err) + return errstd.Join(utils.ErrFailedDeleteWorkerPod, err) } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Deleted", - "Deleted worker Pod %s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v", - workerPod.Name, workerPod.Status.Phase, workerPod.Spec.RestartPolicy, getRayContainerStateTerminated(workerPod)) + r.rayClusterScaleExpectation.ExpectScalePod(workerPod.Namespace, instance.Name, worker.GroupName, workerPod.Name, expectations.Delete) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedWorkerPod), + "Deleted worker Pod %s/%s; Pod status: %s; Pod restart policy: %s; Ray container terminated status: %v", + workerPod.Namespace, workerPod.Name, workerPod.Status.Phase, workerPod.Spec.RestartPolicy, getRayContainerStateTerminated(workerPod)) } } // If we delete unhealthy Pods, we will not create new Pods in this reconciliation. if numDeletedUnhealthyWorkerPods > 0 { - return fmt.Errorf("Delete %d unhealthy worker Pods.", numDeletedUnhealthyWorkerPods) + return fmt.Errorf("delete %d unhealthy worker Pods", numDeletedUnhealthyWorkerPods) } // Always remove the specified WorkersToDelete - regardless of the value of Replicas. @@ -779,12 +887,14 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv if err := r.Delete(ctx, &pod); err != nil { if !errors.IsNotFound(err) { logger.Info("reconcilePods", "Fail to delete Pod", pod.Name, "error", err) - return err + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPod), "Failed deleting pod %s/%s, %v", pod.Namespace, pod.Name, err) + return errstd.Join(utils.ErrFailedDeleteWorkerPod, err) } logger.Info("reconcilePods", "The worker Pod has already been deleted", pod.Name) } else { + r.rayClusterScaleExpectation.ExpectScalePod(pod.Namespace, instance.Name, worker.GroupName, pod.Name, expectations.Delete) deletedWorkers[pod.Name] = deleted - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Deleted", "Deleted pod %s", pod.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedWorkerPod), "Deleted pod %s/%s", pod.Namespace, pod.Name) } } worker.ScaleStrategy.WorkersToDelete = []string{} @@ -796,20 +906,24 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv } } // A replica can contain multiple hosts, so we need to calculate this based on the number of hosts per replica. - numExpectedPods := workerReplicas * worker.NumOfHosts - diff := numExpectedPods - int32(len(runningPods.Items)) + // If the user doesn't install the CRD with `NumOfHosts`, the zero value of `NumOfHosts`, which is 0, will be used. + // Hence, all workers will be deleted. Here, we set `NumOfHosts` to max(1, `NumOfHosts`) to avoid this situation. + if worker.NumOfHosts <= 0 { + worker.NumOfHosts = 1 + } + numExpectedPods := int(workerReplicas * worker.NumOfHosts) + diff := numExpectedPods - len(runningPods.Items) - logger.Info("reconcilePods", "workerReplicas", workerReplicas, "runningPods", len(runningPods.Items), "diff", diff) + logger.Info("reconcilePods", "workerReplicas", workerReplicas, "NumOfHosts", worker.NumOfHosts, "runningPods", len(runningPods.Items), "diff", diff) if diff > 0 { // pods need to be added logger.Info("reconcilePods", "Number workers to add", diff, "Worker group", worker.GroupName) // create all workers of this group - var i int32 - for i = 0; i < diff; i++ { - logger.Info("reconcilePods", "creating worker for group", worker.GroupName, fmt.Sprintf("index %d", i), fmt.Sprintf("in total %d", diff)) + for i := 0; i < diff; i++ { + logger.Info("reconcilePods", "creating worker for group", worker.GroupName, "index", i, "total", diff) if err := r.createWorkerPod(ctx, *instance, *worker.DeepCopy()); err != nil { - return err + return errstd.Join(utils.ErrFailedCreateWorkerPod, err) } } } else if diff == 0 { @@ -819,7 +933,7 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv // diff < 0 indicates the need to delete some Pods to match the desired number of replicas. However, // randomly deleting Pods is certainly not ideal. So, if autoscaling is enabled for the cluster, we // will disable random Pod deletion, making Autoscaler the sole decision-maker for Pod deletions. - enableInTreeAutoscaling := (instance.Spec.EnableInTreeAutoscaling != nil) && (*instance.Spec.EnableInTreeAutoscaling) + enableInTreeAutoscaling := utils.IsAutoscalingEnabled(instance) // TODO (kevin85421): `enableRandomPodDelete` is a feature flag for KubeRay v0.6.0. If users want to use // the old behavior, they can set the environment variable `ENABLE_RANDOM_POD_DELETE` to `true`. When the @@ -837,19 +951,21 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv // diff < 0 means that we need to delete some Pods to meet the desired number of replicas. randomlyRemovedWorkers := -diff logger.Info("reconcilePods", "Number workers to delete randomly", randomlyRemovedWorkers, "Worker group", worker.GroupName) - for i := 0; i < int(randomlyRemovedWorkers); i++ { + for i := 0; i < randomlyRemovedWorkers; i++ { randomPodToDelete := runningPods.Items[i] logger.Info("Randomly deleting Pod", "progress", fmt.Sprintf("%d / %d", i+1, randomlyRemovedWorkers), "with name", randomPodToDelete.Name) if err := r.Delete(ctx, &randomPodToDelete); err != nil { if !errors.IsNotFound(err) { - return err + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPod), "Failed deleting Pod %s/%s, %v", randomPodToDelete.Namespace, randomPodToDelete.Name, err) + return errstd.Join(utils.ErrFailedDeleteWorkerPod, err) } logger.Info("reconcilePods", "The worker Pod has already been deleted", randomPodToDelete.Name) } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Deleted", "Deleted Pod %s", randomPodToDelete.Name) + r.rayClusterScaleExpectation.ExpectScalePod(randomPodToDelete.Namespace, instance.Name, worker.GroupName, randomPodToDelete.Name, expectations.Delete) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.DeletedWorkerPod), "Deleted Pod %s/%s", randomPodToDelete.Namespace, randomPodToDelete.Name) } } else { - logger.Info(fmt.Sprintf("Random Pod deletion is disabled for cluster %s. The only decision-maker for Pod deletions is Autoscaler.", instance.Name)) + logger.Info("Random Pod deletion is disabled for the cluster. The only decision-maker for Pod deletions is Autoscaler.") } } } @@ -865,43 +981,33 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv // (1) shouldDelete: Whether the Pod should be deleted. // (2) reason: The reason why the Pod should or should not be deleted. func shouldDeletePod(pod corev1.Pod, nodeType rayv1.RayNodeType) (bool, string) { - // If a Pod's restart policy is set to `Always`, KubeRay will not delete - // the Pod and rely on the Pod's restart policy to restart the Pod. - isRestartPolicyAlways := pod.Spec.RestartPolicy == corev1.RestartPolicyAlways + // Based on the logic of the change of the status of the K8S pod, the following judgment is made. + // https://github.com/kubernetes/kubernetes/blob/3361895612dac57044d5dacc029d2ace1865479c/pkg/kubelet/kubelet_pods.go#L1556 // If the Pod's status is `Failed` or `Succeeded`, the Pod will not restart and we can safely delete it. if pod.Status.Phase == corev1.PodFailed || pod.Status.Phase == corev1.PodSucceeded { - if isRestartPolicyAlways { - // Based on my observation, a Pod with `RestartPolicy: Always` will never be in the terminated states (i.e., `Failed` or `Succeeded`). - // However, I couldn't find any well-defined behavior in the Kubernetes documentation, so I can't guarantee that the status transition - // from `Running` to `Failed / Succeeded` and back to `Running` won't occur when we kill the main process (i.e., `ray start` in KubeRay) - // in the head Pod. Therefore, I've added this check as a safeguard. - reason := fmt.Sprintf( - "The status of the %s Pod %s is %s. However, KubeRay will not delete the Pod because its restartPolicy is set to 'Always' "+ - "and it should be able to restart automatically.", nodeType, pod.Name, pod.Status.Phase) - return false, reason - } - reason := fmt.Sprintf( - "The %s Pod %s status is %s which is a terminal state and it will not restart. "+ - "KubeRay will delete the Pod and create new Pods in the next reconciliation if necessary.", nodeType, pod.Name, pod.Status.Phase) + "The %s Pod %s status is %s which is a terminal state. "+ + "KubeRay will delete the Pod and create new Pods in the next reconciliation if necessary.", + nodeType, pod.Name, pod.Status.Phase) return true, reason } rayContainerTerminated := getRayContainerStateTerminated(pod) if pod.Status.Phase == corev1.PodRunning && rayContainerTerminated != nil { - if isRestartPolicyAlways { - // If restart policy is set to `Always`, KubeRay will not delete the Pod. + if pod.Spec.RestartPolicy == corev1.RestartPolicyNever { reason := fmt.Sprintf( - "The Pod status of the %s Pod %s is %s, and the Ray container terminated status is %v. However, KubeRay will not delete the Pod because its restartPolicy is set to 'Always' "+ - "and it should be able to restart automatically.", nodeType, pod.Name, pod.Status.Phase, rayContainerTerminated) - return false, reason + "The Pod status of the %s Pod %s is %s, and the Ray container terminated status is %v. "+ + "The container is unable to restart due to its restart policy %s, so KubeRay will delete it.", + nodeType, pod.Name, pod.Status.Phase, rayContainerTerminated, pod.Spec.RestartPolicy) + return true, reason } + // If restart policy is set to `Always` or `OnFailure`, KubeRay will not delete the Pod. reason := fmt.Sprintf( "The Pod status of the %s Pod %s is %s, and the Ray container terminated status is %v. "+ - "The container is unable to restart due to its restart policy %s, so KubeRay will delete it.", + "However, KubeRay will not delete the Pod because its restartPolicy is set to %s and it should be able to restart automatically.", nodeType, pod.Name, pod.Status.Phase, rayContainerTerminated, pod.Spec.RestartPolicy) - return true, reason + return false, reason } // TODO (kevin85421): Consider deleting a Pod if its Ray container restarts excessively, as this might @@ -957,11 +1063,11 @@ func (r *RayClusterReconciler) createHeadIngress(ctx context.Context, ingress *n logger.Info("Ingress already exists, no need to create") return nil } - logger.Error(err, "Ingress create error!", "Ingress.Error", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateIngress), "Failed creating ingress %s/%s, %v", ingress.Namespace, ingress.Name, err) return err } - logger.Info("Ingress created successfully", "ingress name", ingress.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created ingress %s", ingress.Name) + logger.Info("Created ingress for RayCluster", "name", ingress.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedIngress), "Created ingress %s/%s", ingress.Namespace, ingress.Name) return nil } @@ -976,34 +1082,29 @@ func (r *RayClusterReconciler) createHeadRoute(ctx context.Context, route *route logger.Info("Route already exists, no need to create") return nil } - logger.Error(err, "Route create error!", "Route.Error", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateRoute), "Failed creating route %s/%s, %v", route.Namespace, route.Name, err) return err } - logger.Info("Route created successfully", "route name", route.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created route %s", route.Name) + logger.Info("Created route for RayCluster", "name", route.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedRoute), "Created route %s/%s", route.Namespace, route.Name) return nil } -func (r *RayClusterReconciler) createService(ctx context.Context, raySvc *corev1.Service, instance *rayv1.RayCluster) error { +func (r *RayClusterReconciler) createService(ctx context.Context, svc *corev1.Service, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) // making sure the name is valid - raySvc.Name = utils.CheckName(raySvc.Name) - // Set controller reference - if err := controllerutil.SetControllerReference(instance, raySvc, r.Scheme); err != nil { + svc.Name = utils.CheckName(svc.Name) + if err := controllerutil.SetControllerReference(instance, svc, r.Scheme); err != nil { return err } - if err := r.Create(ctx, raySvc); err != nil { - if errors.IsAlreadyExists(err) { - logger.Info("Pod service already exist, no need to create") - return nil - } - logger.Error(err, "Pod Service create error!", "Pod.Service.Error", err) + if err := r.Create(ctx, svc); err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateService), "Failed creating service %s/%s, %v", svc.Namespace, svc.Name, err) return err } - logger.Info("Pod Service created successfully", "service name", raySvc.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created service %s", raySvc.Name) + logger.Info("Created service for RayCluster", "name", svc.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedService), "Created service %s/%s", svc.Namespace, svc.Name) return nil } @@ -1012,35 +1113,23 @@ func (r *RayClusterReconciler) createHeadPod(ctx context.Context, instance rayv1 // build the pod then create it pod := r.buildHeadPod(ctx, instance) - podIdentifier := types.NamespacedName{ - Name: pod.Name, - Namespace: pod.Namespace, - } - if EnableBatchScheduler { - if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(&instance); err == nil { - scheduler.AddMetadataToPod(&instance, utils.RayNodeHeadGroupLabelValue, &pod) + // check if the batch scheduler integration is enabled + // call the scheduler plugin if so + if r.BatchSchedulerMgr != nil { + if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(); err == nil { + scheduler.AddMetadataToPod(ctx, &instance, utils.RayNodeHeadGroupLabelValue, &pod) } else { return err } } - logger.Info("createHeadPod", "head pod with name", pod.GenerateName) if err := r.Create(ctx, &pod); err != nil { - if errors.IsAlreadyExists(err) { - fetchedPod := corev1.Pod{} - // the pod might be in terminating state, we need to check - if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil { - if fetchedPod.DeletionTimestamp != nil { - logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier) - return err - } - } - logger.Info("Creating pod", "Pod already exists", pod.Name) - } else { - return err - } + r.Recorder.Eventf(&instance, corev1.EventTypeWarning, string(utils.FailedToCreateHeadPod), "Failed to create head Pod %s/%s, %v", pod.Namespace, pod.Name, err) + return err } - r.Recorder.Eventf(&instance, corev1.EventTypeNormal, "Created", "Created head pod %s", pod.Name) + r.rayClusterScaleExpectation.ExpectScalePod(pod.Namespace, instance.Name, expectations.HeadGroup, pod.Name, expectations.Create) + logger.Info("Created head Pod for RayCluster", "name", pod.Name) + r.Recorder.Eventf(&instance, corev1.EventTypeNormal, string(utils.CreatedHeadPod), "Created head Pod %s/%s", pod.Namespace, pod.Name) return nil } @@ -1049,13 +1138,9 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray // build the pod then create it pod := r.buildWorkerPod(ctx, instance, worker) - podIdentifier := types.NamespacedName{ - Name: pod.Name, - Namespace: pod.Namespace, - } - if EnableBatchScheduler { - if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(&instance); err == nil { - scheduler.AddMetadataToPod(&instance, worker.GroupName, &pod) + if r.BatchSchedulerMgr != nil { + if scheduler, err := r.BatchSchedulerMgr.GetSchedulerForCluster(); err == nil { + scheduler.AddMetadataToPod(ctx, &instance, worker.GroupName, &pod) } else { return err } @@ -1063,35 +1148,23 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray replica := pod if err := r.Create(ctx, &replica); err != nil { - if errors.IsAlreadyExists(err) { - fetchedPod := corev1.Pod{} - // the pod might be in terminating state, we need to check - if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil { - if fetchedPod.DeletionTimestamp != nil { - logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier) - return err - } - } - logger.Info("Creating pod", "Pod already exists", pod.Name) - } else { - logger.Error(fmt.Errorf("createWorkerPod error"), "error creating pod", "pod", pod, "err = ", err) - return err - } + r.Recorder.Eventf(&instance, corev1.EventTypeWarning, string(utils.FailedToCreateWorkerPod), "Failed to create worker Pod %s/%s, %v", pod.Namespace, pod.Name, err) + return err } - logger.Info("Created pod", "Pod ", pod.GenerateName) - r.Recorder.Eventf(&instance, corev1.EventTypeNormal, "Created", "Created worker pod %s", pod.Name) + r.rayClusterScaleExpectation.ExpectScalePod(replica.Namespace, instance.Name, worker.GroupName, replica.Name, expectations.Create) + logger.Info("Created worker Pod for RayCluster", "name", pod.Name) + r.Recorder.Eventf(&instance, corev1.EventTypeNormal, string(utils.CreatedWorkerPod), "Created worker Pod %s/%s", pod.Namespace, pod.Name) return nil } // Build head instance pod(s). func (r *RayClusterReconciler) buildHeadPod(ctx context.Context, instance rayv1.RayCluster) corev1.Pod { logger := ctrl.LoggerFrom(ctx) - podName := strings.ToLower(instance.Name + utils.DashSymbol + string(rayv1.HeadNode) + utils.DashSymbol) - podName = utils.CheckName(podName) // making sure the name is valid + podName := utils.PodGenerateName(instance.Name, rayv1.HeadNode) fqdnRayIP := utils.GenerateFQDNServiceName(ctx, instance, instance.Namespace) // Fully Qualified Domain Name // The Ray head port used by workers to connect to the cluster (GCS server port for Ray >= 1.11.0, Redis port for older Ray.) headPort := common.GetHeadPort(instance.Spec.HeadGroupSpec.RayStartParams) - autoscalingEnabled := instance.Spec.EnableInTreeAutoscaling + autoscalingEnabled := utils.IsAutoscalingEnabled(&instance) podConf := common.DefaultHeadPodTemplate(ctx, instance, instance.Spec.HeadGroupSpec, podName, headPort) if len(r.headSidecarContainers) > 0 { podConf.Spec.Containers = append(podConf.Spec.Containers, r.headSidecarContainers...) @@ -1114,13 +1187,12 @@ func getCreatorCRDType(instance rayv1.RayCluster) utils.CRDType { // Build worker instance pods. func (r *RayClusterReconciler) buildWorkerPod(ctx context.Context, instance rayv1.RayCluster, worker rayv1.WorkerGroupSpec) corev1.Pod { logger := ctrl.LoggerFrom(ctx) - podName := strings.ToLower(instance.Name + utils.DashSymbol + string(rayv1.WorkerNode) + utils.DashSymbol + worker.GroupName + utils.DashSymbol) - podName = utils.CheckName(podName) // making sure the name is valid + podName := utils.PodGenerateName(fmt.Sprintf("%s-%s", instance.Name, worker.GroupName), rayv1.WorkerNode) fqdnRayIP := utils.GenerateFQDNServiceName(ctx, instance, instance.Namespace) // Fully Qualified Domain Name // The Ray head port used by workers to connect to the cluster (GCS server port for Ray >= 1.11.0, Redis port for older Ray.) headPort := common.GetHeadPort(instance.Spec.HeadGroupSpec.RayStartParams) - autoscalingEnabled := instance.Spec.EnableInTreeAutoscaling + autoscalingEnabled := utils.IsAutoscalingEnabled(&instance) podTemplateSpec := common.DefaultWorkerPodTemplate(ctx, instance, worker, podName, fqdnRayIP, headPort) if len(r.workerSidecarContainers) > 0 { podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, r.workerSidecarContainers...) @@ -1153,8 +1225,12 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instanc "import sys; " + "redis_address = os.getenv('RAY_REDIS_ADDRESS', '').split(',')[0]; " + "redis_address = redis_address if '://' in redis_address else 'redis://' + redis_address; " + - "parsed = urlparse(redis_address); " + - "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"", + "parsed = urlparse(redis_address); ", + } + if utils.EnvVarExists(utils.REDIS_USERNAME, pod.Spec.Containers[utils.RayContainerIndex].Env) { + pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, username=os.getenv('REDIS_USERNAME', parsed.username), password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"" + } else { + pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"" } // Disable liveness and readiness probes because the Job will not launch processes like Raylet and GCS. @@ -1195,13 +1271,13 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instanc Annotations: pod.Annotations, }, Spec: batchv1.JobSpec{ - BackoffLimit: pointer.Int32(0), + BackoffLimit: ptr.To[int32](0), Template: corev1.PodTemplateSpec{ ObjectMeta: pod.ObjectMeta, Spec: pod.Spec, }, // make this job be best-effort only for 5 minutes. - ActiveDeadlineSeconds: pointer.Int64(300), + ActiveDeadlineSeconds: ptr.To[int64](300), }, } @@ -1223,8 +1299,8 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager, reconcileConcu Owns(&corev1.Pod{}). Owns(&corev1.Service{}) - if EnableBatchScheduler { - b = batchscheduler.ConfigureReconciler(b) + if r.BatchSchedulerMgr != nil { + r.BatchSchedulerMgr.ConfigureReconciler(b) } return b. @@ -1241,19 +1317,42 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager, reconcileConcu Complete(r) } -func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *rayv1.RayCluster) (*rayv1.RayCluster, error) { +func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *rayv1.RayCluster, reconcileErr error) (*rayv1.RayCluster, error) { + // TODO: Replace this log and use reconcileErr to set the condition field. + logger := ctrl.LoggerFrom(ctx) + if reconcileErr != nil { + logger.Info("Reconciliation error", "error", reconcileErr) + } + // Deep copy the instance, so we don't mutate the original object. newInstance := instance.DeepCopy() + statusConditionGateEnabled := features.Enabled(features.RayClusterStatusConditions) + if statusConditionGateEnabled { + if reconcileErr != nil { + if reason := utils.RayClusterReplicaFailureReason(reconcileErr); reason != "" { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterReplicaFailure), + Status: metav1.ConditionTrue, + Reason: reason, + Message: reconcileErr.Error(), + }) + } + } else { + // if reconcileErr == nil, we can safely remove the RayClusterReplicaFailure condition. + meta.RemoveStatusCondition(&newInstance.Status.Conditions, string(rayv1.RayClusterReplicaFailure)) + } + } + // TODO (kevin85421): ObservedGeneration should be used to determine whether to update this CR or not. newInstance.Status.ObservedGeneration = newInstance.ObjectMeta.Generation runtimePods := corev1.PodList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: newInstance.Name} - if err := r.List(ctx, &runtimePods, client.InNamespace(newInstance.Namespace), filterLabels); err != nil { + if err := r.List(ctx, &runtimePods, common.RayClusterAllPodsAssociationOptions(newInstance).ToListOptions()...); err != nil { return nil, err } + newInstance.Status.ReadyWorkerReplicas = utils.CalculateReadyReplicas(runtimePods) newInstance.Status.AvailableWorkerReplicas = utils.CalculateAvailableReplicas(runtimePods) newInstance.Status.DesiredWorkerReplicas = utils.CalculateDesiredReplicas(ctx, newInstance) newInstance.Status.MinWorkerReplicas = utils.CalculateMinReplicas(newInstance) @@ -1265,22 +1364,104 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra newInstance.Status.DesiredGPU = sumGPUs(totalResources) newInstance.Status.DesiredTPU = totalResources[corev1.ResourceName("google.com/tpu")] - // validation for the RayStartParam for the state. - isValid, err := common.ValidateHeadRayStartParams(ctx, newInstance.Spec.HeadGroupSpec) - if err != nil { - r.Recorder.Event(newInstance, corev1.EventTypeWarning, string(rayv1.RayConfigError), err.Error()) - } - // only in invalid status that we update the status to unhealthy. - if !isValid { - newInstance.Status.State = rayv1.Unhealthy - } else { + if reconcileErr == nil && len(runtimePods.Items) == int(newInstance.Status.DesiredWorkerReplicas)+1 { // workers + 1 head if utils.CheckAllPodsRunning(ctx, runtimePods) { - newInstance.Status.State = rayv1.Ready + newInstance.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + newInstance.Status.Reason = "" + } + } + + // Check if the head node is running and ready by checking the head pod's status or if the cluster has been suspended. + if statusConditionGateEnabled { + headPod, err := common.GetRayClusterHeadPod(ctx, r, newInstance) + if err != nil { + return nil, err + } + // GetRayClusterHeadPod can return nil, nil when pod is not found, we handle it separately. + if headPod == nil { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.HeadPodReady), + Status: metav1.ConditionFalse, + Reason: rayv1.HeadPodNotFound, + Message: "Head Pod not found", + }) + } else { + headPodReadyCondition := utils.FindHeadPodReadyCondition(headPod) + meta.SetStatusCondition(&newInstance.Status.Conditions, headPodReadyCondition) + } + + suspendStatus := utils.FindRayClusterSuspendStatus(newInstance) + if !meta.IsStatusConditionTrue(newInstance.Status.Conditions, string(rayv1.RayClusterProvisioned)) && suspendStatus != rayv1.RayClusterSuspended { + // RayClusterProvisioned indicates whether all Ray Pods are ready when the RayCluster is first created. + // Note RayClusterProvisioned StatusCondition will not be updated after all Ray Pods are ready for the first time. Unless the cluster has been suspended. + if utils.CheckAllPodsRunning(ctx, runtimePods) { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterProvisioned), + Status: metav1.ConditionTrue, + Reason: rayv1.AllPodRunningAndReadyFirstTime, + Message: "All Ray Pods are ready for the first time", + }) + } else { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterProvisioned), + Status: metav1.ConditionFalse, + Reason: rayv1.RayClusterPodsProvisioning, + Message: "RayCluster Pods are being provisioned for first time", + }) + } + } + + if suspendStatus == rayv1.RayClusterSuspending { + if len(runtimePods.Items) == 0 { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterProvisioned), + Status: metav1.ConditionFalse, + Reason: rayv1.RayClusterPodsProvisioning, + Message: "RayCluster has been suspended", + }) + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspending), + Reason: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionFalse, + }) + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspended), + Reason: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionTrue, + }) + } + } else if suspendStatus == rayv1.RayClusterSuspended { + if instance.Spec.Suspend != nil && !*instance.Spec.Suspend { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspended), + Reason: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionFalse, + }) + } + } else { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspended), + Reason: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionFalse, + }) + if instance.Spec.Suspend != nil && *instance.Spec.Suspend { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspending), + Reason: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionTrue, + }) + } else { + meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{ + Type: string(rayv1.RayClusterSuspending), + Reason: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionFalse, + }) + } } } if newInstance.Spec.Suspend != nil && *newInstance.Spec.Suspend && len(runtimePods.Items) == 0 { - newInstance.Status.State = rayv1.Suspended + newInstance.Status.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } if err := r.updateEndpoints(ctx, newInstance); err != nil { @@ -1294,41 +1475,40 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra timeNow := metav1.Now() newInstance.Status.LastUpdateTime = &timeNow - return newInstance, nil -} - -// Best effort to obtain the ip of the head node. -func (r *RayClusterReconciler) getHeadPodIP(ctx context.Context, instance *rayv1.RayCluster) (string, error) { - logger := ctrl.LoggerFrom(ctx) - - runtimePods := corev1.PodList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)} - if err := r.List(ctx, &runtimePods, client.InNamespace(instance.Namespace), filterLabels); err != nil { - logger.Error(err, "Failed to list pods while getting head pod ip.") - return "", err - } - if len(runtimePods.Items) != 1 { - logger.Info(fmt.Sprintf("Found %d head pods. cluster name %s, filter labels %v", len(runtimePods.Items), instance.Name, filterLabels)) - return "", nil + if instance.Status.State != newInstance.Status.State { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + if newInstance.Status.StateTransitionTimes == nil { + newInstance.Status.StateTransitionTimes = make(map[rayv1.ClusterState]*metav1.Time) + } + newInstance.Status.StateTransitionTimes[newInstance.Status.State] = &timeNow //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } - return runtimePods.Items[0].Status.PodIP, nil + + return newInstance, nil } -func (r *RayClusterReconciler) getHeadServiceIP(ctx context.Context, instance *rayv1.RayCluster) (string, error) { +func (r *RayClusterReconciler) getHeadServiceIPAndName(ctx context.Context, instance *rayv1.RayCluster) (string, string, error) { runtimeServices := corev1.ServiceList{} - filterLabels := client.MatchingLabels(common.HeadServiceLabels(*instance)) - if err := r.List(ctx, &runtimeServices, client.InNamespace(instance.Namespace), filterLabels); err != nil { - return "", err + if err := r.List(ctx, &runtimeServices, common.RayClusterHeadServiceListOptions(instance)...); err != nil { + return "", "", err } if len(runtimeServices.Items) < 1 { - return "", fmt.Errorf("unable to find head service. cluster name %s, filter labels %v", instance.Name, filterLabels) + return "", "", fmt.Errorf("unable to find head service. cluster name %s, filter labels %v", instance.Name, common.RayClusterHeadServiceListOptions(instance)) } else if len(runtimeServices.Items) > 1 { - return "", fmt.Errorf("found multiple head services. cluster name %s, filter labels %v", instance.Name, filterLabels) + return "", "", fmt.Errorf("found multiple head services. cluster name %s, filter labels %v", instance.Name, common.RayClusterHeadServiceListOptions(instance)) } else if runtimeServices.Items[0].Spec.ClusterIP == "" { - return "", fmt.Errorf("head service IP is empty. cluster name %s, filter labels %v", instance.Name, filterLabels) + return "", "", fmt.Errorf("head service IP is empty. cluster name %s, filter labels %v", instance.Name, common.RayClusterHeadServiceListOptions(instance)) + } else if runtimeServices.Items[0].Spec.ClusterIP == corev1.ClusterIPNone { + // We return Head Pod IP if the Head service is headless. + headPod, err := common.GetRayClusterHeadPod(ctx, r, instance) + if err != nil { + return "", "", err + } + if headPod != nil { + return headPod.Status.PodIP, runtimeServices.Items[0].Name, nil + } + return "", runtimeServices.Items[0].Name, nil } - return runtimeServices.Items[0].Spec.ClusterIP, nil + return runtimeServices.Items[0].Spec.ClusterIP, runtimeServices.Items[0].Name, nil } func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *rayv1.RayCluster) error { @@ -1337,11 +1517,8 @@ func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *ra // We assume we can find the right one by filtering Services with appropriate label selectors // and picking the first one. We may need to select by name in the future if the Service naming is stable. rayHeadSvc := corev1.ServiceList{} - filterLabels := client.MatchingLabels{ - utils.RayClusterLabelKey: instance.Name, - utils.RayNodeTypeLabelKey: "head", - } - if err := r.List(ctx, &rayHeadSvc, client.InNamespace(instance.Namespace), filterLabels); err != nil { + filterLabels := common.RayClusterHeadServiceListOptions(instance) + if err := r.List(ctx, &rayHeadSvc, filterLabels...); err != nil { return err } @@ -1352,7 +1529,7 @@ func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *ra } for _, port := range svc.Spec.Ports { if len(port.Name) == 0 { - logger.Info("updateStatus", "service port's name is empty. Not adding it to RayCluster status.endpoints", port) + logger.Info("updateStatus: Service port's name is empty. Not adding it to RayCluster status.endpoints", "port", port) continue } if port.NodePort != 0 { @@ -1362,35 +1539,42 @@ func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *ra } else if port.TargetPort.StrVal != "" { instance.Status.Endpoints[port.Name] = port.TargetPort.StrVal } else { - logger.Info("updateStatus", "service port's targetPort is empty. Not adding it to RayCluster status.endpoints", port) + logger.Info("updateStatus: Service port's targetPort is empty. Not adding it to RayCluster status.endpoints", "port", port) } } } else { - logger.Info("updateEndpoints", "unable to find a Service for this RayCluster. Not adding RayCluster status.endpoints", instance.Name, "Service selectors", filterLabels) + logger.Info("updateEndpoints: Unable to find a Service for this RayCluster. Not adding RayCluster status.endpoints", "serviceSelectors", filterLabels) } return nil } func (r *RayClusterReconciler) updateHeadInfo(ctx context.Context, instance *rayv1.RayCluster) error { - if ip, err := r.getHeadPodIP(ctx, instance); err != nil { + headPod, err := common.GetRayClusterHeadPod(ctx, r, instance) + if err != nil { return err + } + if headPod != nil { + instance.Status.Head.PodIP = headPod.Status.PodIP + instance.Status.Head.PodName = headPod.Name } else { - instance.Status.Head.PodIP = ip + instance.Status.Head.PodIP = "" + instance.Status.Head.PodName = "" } - if ip, err := r.getHeadServiceIP(ctx, instance); err != nil { + ip, name, err := r.getHeadServiceIPAndName(ctx, instance) + if err != nil { return err - } else { - instance.Status.Head.ServiceIP = ip } + instance.Status.Head.ServiceIP = ip + instance.Status.Head.ServiceName = name return nil } func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) - if instance.Spec.EnableInTreeAutoscaling == nil || !*instance.Spec.EnableInTreeAutoscaling { + if !utils.IsAutoscalingEnabled(instance) { return nil } @@ -1407,10 +1591,11 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con // zero-downtime rolling updates when RayService is performed. See https://github.com/ray-project/kuberay/issues/1123 // for more details. if instance.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == namespacedName.Name { - logger.Error(err, fmt.Sprintf( - "If users specify ServiceAccountName for the head Pod, they need to create a ServiceAccount themselves. "+ - "However, ServiceAccount %s is not found. Please create one. "+ - "See the PR description of https://github.com/ray-project/kuberay/pull/1128 for more details.", namespacedName.Name), "ServiceAccount", namespacedName) + actionableMessage := fmt.Sprintf("If users specify ServiceAccountName for the head Pod, they need to create a ServiceAccount themselves. "+ + "However, ServiceAccount %s is not found. Please create one. See the PR description of https://github.com/ray-project/kuberay/pull/1128 for more details.", namespacedName.Name) + + logger.Error(err, actionableMessage) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.AutoscalerServiceAccountNotFound), "Failed to reconcile RayCluster %s/%s. %s", instance.Namespace, instance.Name, actionableMessage) return err } @@ -1433,11 +1618,11 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con logger.Info("Pod service account already exist, no need to create") return nil } - logger.Error(err, "Pod Service Account create error!", "Pod.ServiceAccount.Error", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateServiceAccount), "Failed creating service account %s/%s, %v", serviceAccount.Namespace, serviceAccount.Name, err) return err } - logger.Info("Pod ServiceAccount created successfully", "service account name", serviceAccount.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created service account %s", serviceAccount.Name) + logger.Info("Created service account for Ray Autoscaler", "name", serviceAccount.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedServiceAccount), "Created service account %s/%s", serviceAccount.Namespace, serviceAccount.Name) return nil } @@ -1446,7 +1631,7 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con func (r *RayClusterReconciler) reconcileAutoscalerRole(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) - if instance.Spec.EnableInTreeAutoscaling == nil || !*instance.Spec.EnableInTreeAutoscaling { + if !utils.IsAutoscalingEnabled(instance) { return nil } @@ -1475,11 +1660,11 @@ func (r *RayClusterReconciler) reconcileAutoscalerRole(ctx context.Context, inst logger.Info("role already exist, no need to create") return nil } - logger.Error(err, "Role create error!", "Role.Error", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateRole), "Failed creating role %s/%s, %v", role.Namespace, role.Name, err) return err } - logger.Info("Role created successfully", "role name", role.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created role %s", role.Name) + logger.Info("Created role for Ray Autoscaler", "name", role.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedRole), "Created role %s/%s", role.Namespace, role.Name) return nil } @@ -1488,7 +1673,7 @@ func (r *RayClusterReconciler) reconcileAutoscalerRole(ctx context.Context, inst func (r *RayClusterReconciler) reconcileAutoscalerRoleBinding(ctx context.Context, instance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) - if instance.Spec.EnableInTreeAutoscaling == nil || !*instance.Spec.EnableInTreeAutoscaling { + if !utils.IsAutoscalingEnabled(instance) { return nil } @@ -1517,35 +1702,31 @@ func (r *RayClusterReconciler) reconcileAutoscalerRoleBinding(ctx context.Contex logger.Info("role binding already exist, no need to create") return nil } - logger.Error(err, "Role binding create error!", "RoleBinding.Error", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToCreateRoleBinding), "Failed creating role binding %s/%s, %v", roleBinding.Namespace, roleBinding.Name, err) return err } - logger.Info("RoleBinding created successfully", "role binding name", roleBinding.Name) - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "Created", "Created role binding %s", roleBinding.Name) + logger.Info("Created role binding for Ray Autoscaler", "name", roleBinding.Name) + r.Recorder.Eventf(instance, corev1.EventTypeNormal, string(utils.CreatedRoleBinding), "Created role binding %s/%s", roleBinding.Namespace, roleBinding.Name) return nil } return nil } -func (r *RayClusterReconciler) updateClusterState(ctx context.Context, instance *rayv1.RayCluster, clusterState rayv1.ClusterState) error { +// updateRayClusterStatus updates the RayCluster status if it is inconsistent with the old status and returns a bool to indicate the inconsistency. +// We rely on the returning bool to requeue the reconciliation for atomic operations, such as suspending a RayCluster. +func (r *RayClusterReconciler) updateRayClusterStatus(ctx context.Context, originalRayClusterInstance, newInstance *rayv1.RayCluster) (bool, error) { logger := ctrl.LoggerFrom(ctx) - if instance.Status.State == clusterState { - return nil + inconsistent := r.inconsistentRayClusterStatus(ctx, originalRayClusterInstance.Status, newInstance.Status) + if !inconsistent { + return inconsistent, nil } - instance.Status.State = clusterState - logger.Info("updateClusterState", "Update CR Status.State", clusterState) - return r.Status().Update(ctx, instance) -} - -func (r *RayClusterReconciler) updateClusterReason(ctx context.Context, instance *rayv1.RayCluster, clusterReason string) error { - logger := ctrl.LoggerFrom(ctx) - if instance.Status.Reason == clusterReason { - return nil + logger.Info("updateRayClusterStatus", "name", originalRayClusterInstance.Name, "old status", originalRayClusterInstance.Status, "new status", newInstance.Status) + err := r.Status().Update(ctx, newInstance) + if err != nil { + logger.Info("Error updating status", "name", originalRayClusterInstance.Name, "error", err, "RayCluster", newInstance) } - instance.Status.Reason = clusterReason - logger.Info("updateClusterReason", "Update CR Status.Reason", clusterReason) - return r.Status().Update(ctx, instance) + return inconsistent, err } // sumGPUs sums the GPUs in the given resource list. diff --git a/ray-operator/controllers/ray/raycluster_controller_test.go b/ray-operator/controllers/ray/raycluster_controller_test.go index 940590c0da5..2ad08c07059 100644 --- a/ray-operator/controllers/ray/raycluster_controller_test.go +++ b/ray-operator/controllers/ray/raycluster_controller_test.go @@ -17,20 +17,28 @@ package ray import ( "context" + "errors" "fmt" "time" + "k8s.io/apimachinery/pkg/api/meta" + + "github.com/ray-project/kuberay/ray-operator/pkg/features" + "github.com/ray-project/kuberay/ray-operator/test/support" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" @@ -52,7 +60,7 @@ func rayClusterTemplate(name string, namespace string) *rayv1.RayCluster { Containers: []corev1.Container{ { Name: "ray-head", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), }, }, }, @@ -60,9 +68,9 @@ func rayClusterTemplate(name string, namespace string) *rayv1.RayCluster { }, WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(4), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](4), GroupName: "small-group", RayStartParams: map[string]string{}, Template: corev1.PodTemplateSpec{ @@ -70,7 +78,7 @@ func rayClusterTemplate(name string, namespace string) *rayv1.RayCluster { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), }, }, }, @@ -82,7 +90,7 @@ func rayClusterTemplate(name string, namespace string) *rayv1.RayCluster { } var _ = Context("Inside the default namespace", func() { - Describe("Static RayCluster", func() { + Describe("Static RayCluster", Ordered, func() { ctx := context.Background() namespace := "default" rayCluster := rayClusterTemplate("raycluster-static", namespace) @@ -97,9 +105,9 @@ var _ = Context("Inside the default namespace", func() { // (1) Ray Autoscaler is disabled. // (2) There is only one worker group, and its `replicas` is set to 3, and `maxReplicas` is set to 4, and `workersToDelete` is empty. Expect(rayCluster.Spec.EnableInTreeAutoscaling).To(BeNil()) - Expect(len(rayCluster.Spec.WorkerGroupSpecs)).To(Equal(1)) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(pointer.Int32(3))) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(pointer.Int32(4))) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(ptr.To[int32](4))) Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) }) @@ -133,11 +141,11 @@ var _ = Context("Inside the default namespace", func() { It("Create a head Pod resource with default sidecars", func() { err := k8sClient.List(ctx, &headPods, headFilters...) Expect(err).NotTo(HaveOccurred(), "Failed to list head Pods") - Expect(len(headPods.Items)).Should(Equal(1), "headPods: %v", headPods.Items) + Expect(headPods.Items).Should(HaveLen(1), "headPods: %v", headPods.Items) headPod = headPods.Items[0] Expect(headPod.Spec.Containers[len(headPod.Spec.Containers)-1].Name).Should(Equal("fluentbit"), "fluentbit sidecar exists") - Expect(len(headPod.Spec.Containers)).Should(Equal(2), "Because we disable autoscaling and inject a FluentBit sidecar, the head Pod should have 2 containers") + Expect(headPod.Spec.Containers).Should(HaveLen(2), "Because we disable autoscaling and inject a FluentBit sidecar, the head Pod should have 2 containers") }) It("Update all Pods to Running", func() { @@ -150,21 +158,19 @@ var _ = Context("Inside the default namespace", func() { // Note that this test assumes that headPods and workerPods are up-to-date. for _, headPod := range headPods.Items { headPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &headPod)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) } Eventually( - isAllPodsRunningByFilters(ctx, headPods, headFilters...), - time.Second*3, time.Millisecond*500).Should(Equal(true), "Head Pod should be running.") + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(headPods, headFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "Head Pod should be running.") for _, workerPod := range workerPods.Items { workerPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) } Eventually( - isAllPodsRunningByFilters(ctx, workerPods, workerFilters...), - time.Second*3, time.Millisecond*500).Should(Equal(true), "All worker Pods should be running.") + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(workerPods, workerFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "All worker Pods should be running.") }) It("RayCluster's .status.state should be updated to 'ready' shortly after all Pods are Running", func() { @@ -174,6 +180,13 @@ var _ = Context("Inside the default namespace", func() { Eventually( getClusterState(ctx, namespace, rayCluster.Name), time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + // Check that the StateTransitionTimes are set. + Eventually( + func() *metav1.Time { + status := getClusterStatus(ctx, namespace, rayCluster.Name)() + return status.StateTransitionTimes[rayv1.Ready] + }, + time.Second*3, time.Millisecond*500).Should(Not(BeNil())) }) // The following tests focus on checking whether KubeRay creates the correct number of Pods. @@ -184,7 +197,7 @@ var _ = Context("Inside the default namespace", func() { time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) pod := workerPods.Items[0] - err := k8sClient.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: pointer.Int64(0)}) + err := k8sClient.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: ptr.To[int64](0)}) Expect(err).NotTo(HaveOccurred(), "Failed to delete a Pod") Eventually( listResourceFunc(ctx, &workerPods, workerFilters...), @@ -197,7 +210,7 @@ var _ = Context("Inside the default namespace", func() { Eventually( getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster: %v", rayCluster) - rayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(5) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](5) // Operator may update revision after we get cluster earlier. Update may result in 409 conflict error. // We need to handle conflict error and retry the update. @@ -216,11 +229,199 @@ var _ = Context("Inside the default namespace", func() { }) }) - Describe("RayCluster with autoscaling enabled", func() { + Describe("RayCluster with overridden app.kubernetes.io labels", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-overridden-k8s-labels", namespace) + rayCluster.Spec.HeadGroupSpec.Template.Labels = map[string]string{ + utils.KubernetesApplicationNameLabelKey: "myapp", + } + headPods := corev1.PodList{} + workerPods := corev1.PodList{} + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + It("Verify RayCluster spec", func() { + // These test are designed based on the following assumptions: + // (1) The app.kubernetes.io/name label of the HeadGroupSpec is overridden. + // (2) There is only one worker group, and its `replicas` is set to 3. + Expect(rayCluster.Spec.HeadGroupSpec.Template.Labels[utils.KubernetesApplicationNameLabelKey]).NotTo(BeEmpty()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of head Pods", func() { + numHeadPods := 1 + Eventually( + listResourceFunc(ctx, &headPods, headFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numHeadPods), fmt.Sprintf("headGroup %v", headPods.Items)) + for _, head := range headPods.Items { + Expect(head.Labels[utils.KubernetesApplicationNameLabelKey]).To(Equal("myapp")) + } + }) + + It("Check the number of worker Pods", func() { + numWorkerPods := 3 + Eventually( + listResourceFunc(ctx, &workerPods, workerFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) + }) + + It("Update all Pods to Running", func() { + // We need to manually update Pod statuses otherwise they'll always be Pending. + // envtest doesn't create a full K8s cluster. It's only the control plane. + // There's no container runtime or any other K8s controllers. + // So Pods are created, but no controller updates them from Pending to Running. + // See https://book.kubebuilder.io/reference/envtest.html + + // Note that this test assumes that headPods and workerPods are up-to-date. + for _, headPod := range headPods.Items { + headPod.Status.Phase = corev1.PodRunning + headPod.Status.PodIP = "1.1.1.1" // This should be carried to rayCluster.Status.Head.ServiceIP. We check it later. + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(headPods, headFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "Head Pod should be running.") + + for _, workerPod := range workerPods.Items { + workerPod.Status.Phase = corev1.PodRunning + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(workerPods, workerFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "All worker Pods should be running.") + }) + + It("RayCluster's .status.state and .status.head.ServiceIP should be updated shortly after all Pods are Running", func() { + // Note that RayCluster is `ready` when all Pods are Running and their PodReady conditions are true. + // However, in envtest, PodReady conditions are automatically set to true when Pod.Status.Phase is set to Running. + // We need to figure out the behavior. See https://github.com/ray-project/kuberay/issues/1736 for more details. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + // Check that the StateTransitionTimes are set. + Eventually( + func() *metav1.Time { + status := getClusterStatus(ctx, namespace, rayCluster.Name)() + return status.StateTransitionTimes[rayv1.Ready] + }, + time.Second*3, time.Millisecond*500).Should(Not(BeNil())) + + Eventually(func() (string, error) { + err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)() + return rayCluster.Status.Head.ServiceIP, err + }, time.Second*3, time.Millisecond*500).Should(Equal("1.1.1.1"), "Should be able to see the rayCluster.Status.Head.ServiceIP: %v", rayCluster.Status.Head.ServiceIP) + }) + }) + + Describe("RayCluster with invalid overridden ray.io/cluster labels", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-overridden-cluster-label", namespace) + rayCluster.Spec.HeadGroupSpec.Template.Labels = map[string]string{ + utils.RayClusterLabelKey: "invalid-cluster-name", + } + headPods := corev1.PodList{} + workerPods := corev1.PodList{} + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + It("Verify RayCluster spec", func() { + // These test are designed based on the following assumptions: + // (1) The ray.io/cluster label of the HeadGroupSpec is overridden. + // (2) There is only one worker group, and its `replicas` is set to 3. + Expect(rayCluster.Spec.HeadGroupSpec.Template.Labels[utils.RayClusterLabelKey]).NotTo(BeEmpty()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of head Pods", func() { + numHeadPods := 1 + Eventually( + listResourceFunc(ctx, &headPods, headFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numHeadPods), fmt.Sprintf("headGroup %v, headFilters: %v", headPods.Items, headFilters)) + for _, head := range headPods.Items { + Expect(head.Labels[utils.RayClusterLabelKey]).To(Equal("raycluster-overridden-cluster-label")) + } + }) + + It("Check the number of worker Pods", func() { + numWorkerPods := 3 + Eventually( + listResourceFunc(ctx, &workerPods, workerFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) + }) + + It("Update all Pods to Running", func() { + // We need to manually update Pod statuses otherwise they'll always be Pending. + // envtest doesn't create a full K8s cluster. It's only the control plane. + // There's no container runtime or any other K8s controllers. + // So Pods are created, but no controller updates them from Pending to Running. + // See https://book.kubebuilder.io/reference/envtest.html + + // Note that this test assumes that headPods and workerPods are up-to-date. + for _, headPod := range headPods.Items { + headPod.Status.Phase = corev1.PodRunning + headPod.Status.PodIP = "1.1.1.1" // This should be carried to rayCluster.Status.Head.ServiceIP. We check it later. + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(headPods, headFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "Head Pod should be running.") + + for _, workerPod := range workerPods.Items { + workerPod.Status.Phase = corev1.PodRunning + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(workerPods, workerFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "All worker Pods should be running.") + }) + + It("RayCluster's .status.state and .status.head.ServiceIP should be updated shortly after all Pods are Running", func() { + // Note that RayCluster is `ready` when all Pods are Running and their PodReady conditions are true. + // However, in envtest, PodReady conditions are automatically set to true when Pod.Status.Phase is set to Running. + // We need to figure out the behavior. See https://github.com/ray-project/kuberay/issues/1736 for more details. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + // Check that the StateTransitionTimes are set. + Eventually( + func() *metav1.Time { + status := getClusterStatus(ctx, namespace, rayCluster.Name)() + return status.StateTransitionTimes[rayv1.Ready] + }, + time.Second*3, time.Millisecond*500).Should(Not(BeNil())) + + Eventually(func() (string, error) { + err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)() + return rayCluster.Status.Head.ServiceIP, err + }, time.Second*3, time.Millisecond*500).Should(Equal("1.1.1.1"), "Should be able to see the rayCluster.Status.Head.ServiceIP: %v", rayCluster.Status.Head.ServiceIP) + }) + }) + + Describe("RayCluster with autoscaling enabled", Ordered, func() { ctx := context.Background() namespace := "default" rayCluster := rayClusterTemplate("raycluster-autoscaler", namespace) - rayCluster.Spec.EnableInTreeAutoscaling = pointer.Bool(true) + rayCluster.Spec.EnableInTreeAutoscaling = ptr.To(true) workerPods := corev1.PodList{} workerFilter := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() @@ -228,10 +429,10 @@ var _ = Context("Inside the default namespace", func() { // These test are designed based on the following assumptions: // (1) Ray Autoscaler is enabled. // (2) There is only one worker group, and its `replicas` is set to 3, and `maxReplicas` is set to 4, and `workersToDelete` is empty. - Expect(*rayCluster.Spec.EnableInTreeAutoscaling).To(Equal(true)) - Expect(len(rayCluster.Spec.WorkerGroupSpecs)).To(Equal(1)) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(pointer.Int32(3))) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(pointer.Int32(4))) + Expect(*rayCluster.Spec.EnableInTreeAutoscaling).To(BeTrue()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(ptr.To[int32](4))) Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) }) @@ -276,7 +477,7 @@ var _ = Context("Inside the default namespace", func() { getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil()) podToDelete := workerPods.Items[0] - rayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(2) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](2) rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{podToDelete.Name} return k8sClient.Update(ctx, rayCluster) }) @@ -289,7 +490,7 @@ var _ = Context("Inside the default namespace", func() { // Ray Autoscaler should clean up WorkersToDelete after scaling process has finished. // Call cleanUpWorkersToDelete to simulate the behavior of the Ray Autoscaler. - cleanUpWorkersToDelete(ctx, rayCluster, 0) + cleanUpWorkersToDelete(ctx, rayCluster) }) It("Simulate Ray Autoscaler scales up", func() { @@ -297,7 +498,7 @@ var _ = Context("Inside the default namespace", func() { Eventually( getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil()) - rayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(4) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](4) return k8sClient.Update(ctx, rayCluster) }) Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster custom resource") @@ -309,7 +510,43 @@ var _ = Context("Inside the default namespace", func() { }) }) - Describe("Suspend RayCluster", func() { + updateRayClusterSuspendField := func(ctx context.Context, rayCluster *rayv1.RayCluster, suspend bool) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: rayCluster.Namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster = %v", rayCluster) + rayCluster.Spec.Suspend = &suspend + return k8sClient.Update(ctx, rayCluster) + }) + } + + updateRayClusterWorkerGroupSuspendField := func(ctx context.Context, rayCluster *rayv1.RayCluster, suspend bool) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: rayCluster.Namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster = %v", rayCluster) + rayCluster.Spec.WorkerGroupSpecs[0].Suspend = &suspend + return k8sClient.Update(ctx, rayCluster) + }) + } + + findRayClusterSuspendStatus := func(ctx context.Context, rayCluster *rayv1.RayCluster) (rayv1.RayClusterConditionType, error) { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: rayCluster.Namespace}, rayCluster)(); err != nil { + return "", err + } + suspending := meta.IsStatusConditionTrue(rayCluster.Status.Conditions, string(rayv1.RayClusterSuspending)) + suspended := meta.IsStatusConditionTrue(rayCluster.Status.Conditions, string(rayv1.RayClusterSuspended)) + if suspending && suspended { + return "invalid", errors.New("invalid: rayv1.RayClusterSuspending and rayv1.RayClusterSuspended should not be both true") + } else if suspending { + return rayv1.RayClusterSuspending, nil + } else if suspended { + return rayv1.RayClusterSuspended, nil + } + return "", nil + } + + testSuspendRayCluster := func(withConditionDisabled bool) { ctx := context.Background() namespace := "default" rayCluster := rayClusterTemplate("raycluster-suspend", namespace) @@ -320,14 +557,21 @@ var _ = Context("Inside the default namespace", func() { headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() allFilters := common.RayClusterAllPodsAssociationOptions(rayCluster).ToListOptions() + BeforeAll(func() { + if withConditionDisabled { + cleanUpFunc := features.SetFeatureGateDuringTest(GinkgoTB(), features.RayClusterStatusConditions, false) + DeferCleanup(cleanUpFunc) + } + }) + It("Verify RayCluster spec", func() { // These test are designed based on the following assumptions: // (1) Ray Autoscaler is disabled. // (2) There is only one worker group, and its `replicas` is set to 3, and `maxReplicas` is set to 4, and `workersToDelete` is empty. Expect(rayCluster.Spec.EnableInTreeAutoscaling).To(BeNil()) - Expect(len(rayCluster.Spec.WorkerGroupSpecs)).To(Equal(1)) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(pointer.Int32(3))) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(pointer.Int32(4))) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(ptr.To[int32](4))) Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) }) @@ -348,14 +592,7 @@ var _ = Context("Inside the default namespace", func() { It("Should delete all head and worker Pods if suspended", func() { // suspend a Raycluster and check that all Pods are deleted. - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), - time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster: %v", rayCluster) - suspend := true - rayCluster.Spec.Suspend = &suspend - return k8sClient.Update(ctx, rayCluster) - }) + err := updateRayClusterSuspendField(ctx, rayCluster, true) Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") // Check that all Pods are deleted @@ -374,18 +611,19 @@ var _ = Context("Inside the default namespace", func() { Eventually( getClusterState(ctx, namespace, rayCluster.Name), time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Suspended)) + if !withConditionDisabled { + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspended)) + Expect(meta.IsStatusConditionTrue(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))).To(BeFalse()) + // rayCluster.Status.Head.PodName will be cleared. + // rayCluster.Status.Head.PodIP will also be cleared, but we don't test it here since we don't have IPs in tests. + Expect(rayCluster.Status.Head.PodName).To(BeEmpty()) + } }) It("Set suspend to false and then revert it to true before all Pods are running", func() { // set suspend to false - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), - time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster = %v", rayCluster) - suspend := false - rayCluster.Spec.Suspend = &suspend - return k8sClient.Update(ctx, rayCluster) - }) + err := updateRayClusterSuspendField(ctx, rayCluster, false) Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") // check that all Pods are created @@ -400,18 +638,11 @@ var _ = Context("Inside the default namespace", func() { // only update worker Pod statuses so that the head Pod status is still Pending. for _, workerPod := range workerPods.Items { workerPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) } // change suspend to true before all Pods are Running. - err = retry.RetryOnConflict(retry.DefaultRetry, func() error { - Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), - time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster = %v", rayCluster) - suspend := true - rayCluster.Spec.Suspend = &suspend - return k8sClient.Update(ctx, rayCluster) - }) + err = updateRayClusterSuspendField(ctx, rayCluster, true) Expect(err).NotTo(HaveOccurred(), "Failed to update test RayCluster resource") // check that all Pods are deleted @@ -429,18 +660,16 @@ var _ = Context("Inside the default namespace", func() { Eventually( getClusterState(ctx, namespace, rayCluster.Name), time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Suspended)) + + if !withConditionDisabled { + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspended)) + } }) It("Should run all head and worker pods if un-suspended", func() { // Resume the suspended RayCluster - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), - time.Second*3, time.Millisecond*500).Should(BeNil(), "rayCluster = %v", rayCluster) - suspend := false - rayCluster.Spec.Suspend = &suspend - return k8sClient.Update(ctx, rayCluster) - }) + err := updateRayClusterSuspendField(ctx, rayCluster, false) Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") // check that all pods are created @@ -456,11 +685,11 @@ var _ = Context("Inside the default namespace", func() { // This is because we don't run kubelets in the unit tests to update the status subresource. for _, headPod := range headPods.Items { headPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &headPod)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) } for _, workerPod := range workerPods.Items { workerPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) } }) @@ -468,16 +697,226 @@ var _ = Context("Inside the default namespace", func() { Eventually( getClusterState(ctx, namespace, rayCluster.Name), time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + if !withConditionDisabled { + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(BeEmpty()) + Expect(meta.IsStatusConditionTrue(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))).To(BeTrue()) + // rayCluster.Status.Head.PodName should have a value now. + // rayCluster.Status.Head.PodIP should also have a value now, but we don't test it here since we don't have IPs in tests. + Expect(rayCluster.Status.Head.PodName).NotTo(BeEmpty()) + } + }) + + It("Delete the cluster", func() { + err := k8sClient.Delete(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred()) }) + } + + Describe("Suspend RayCluster", Ordered, func() { + testSuspendRayCluster(true) }) - Describe("RayCluster with a multi-host worker group", func() { + Describe("Suspend RayCluster with Condition", Ordered, func() { + testSuspendRayCluster(false) + }) + + Describe("Suspend RayCluster atomically with Condition", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-suspend-atomically", namespace) + allPods := corev1.PodList{} + allFilters := common.RayClusterAllPodsAssociationOptions(rayCluster).ToListOptions() + numPods := 4 // 1 Head + 3 Workers + + BeforeAll(func() { + Expect(features.Enabled(features.RayClusterStatusConditions)).To(BeTrue()) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually(getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of Pods and add finalizers", func() { + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(numPods), fmt.Sprintf("all pods %v", allPods.Items)) + // Add finalizers to worker Pods to prevent it from being deleted so that we can test if the status condition makes the suspending process atomic. + for _, pod := range allPods.Items { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + pod.Finalizers = append(pod.Finalizers, "ray.io/deletion-blocker") + return k8sClient.Update(ctx, &pod) + }) + Expect(err).NotTo(HaveOccurred(), "Failed to update Pods") + } + }) + + It("Should turn on the RayClusterSuspending if we set `.Spec.Suspend` back to true", func() { + // suspend a Raycluster. + err := updateRayClusterSuspendField(ctx, rayCluster, true) + Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") + + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspending)) + }) + + It("Should keep RayClusterSuspending consistently if we set `.Spec.Suspend` back to false", func() { + err := updateRayClusterSuspendField(ctx, rayCluster, false) + Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") + + Consistently(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspending)) + }) + + It("Pods should be deleted and new Pods should created back after we remove those finalizers", func() { + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(numPods), fmt.Sprintf("all pods %v", allPods.Items)) + + var oldNames []string + for _, pod := range allPods.Items { + oldNames = append(oldNames, pod.Name) + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + pod.Finalizers = nil + return k8sClient.Update(ctx, &pod) + }) + Expect(err).NotTo(HaveOccurred(), "Failed to update Pods") + } + // RayClusterSuspending and RayClusterSuspended should be both false. + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(BeEmpty()) + Consistently(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(BeEmpty()) + + // New Pods should be created. + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(numPods), fmt.Sprintf("all pods %v", allPods.Items)) + + var newNames []string + for _, pod := range allPods.Items { + newNames = append(newNames, pod.Name) + } + Expect(newNames).NotTo(ConsistOf(oldNames)) + }) + + It("Set suspend to true and all Pods should be deleted again", func() { + err := updateRayClusterSuspendField(ctx, rayCluster, true) + Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") + + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(0), fmt.Sprintf("all pods %v", allPods.Items)) + + Eventually(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspended)) + Consistently(findRayClusterSuspendStatus, time.Second*3, time.Millisecond*500). + WithArguments(ctx, rayCluster).Should(Equal(rayv1.RayClusterSuspended)) + }) + + It("Delete the cluster", func() { + err := k8sClient.Delete(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Describe("Suspend RayCluster worker group", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-suspend-workergroup", namespace) + allPods := corev1.PodList{} + allFilters := common.RayClusterAllPodsAssociationOptions(rayCluster).ToListOptions() + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually(getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of Pods and add finalizers", func() { + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(4), fmt.Sprintf("all pods %v", allPods.Items)) + }) + + It("Setting suspend=true in first worker group should not fail", func() { + // suspend the Raycluster worker group + err := updateRayClusterWorkerGroupSuspendField(ctx, rayCluster, true) + Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") + }) + + It("Worker pods should be deleted but head pod should still be running", func() { + Eventually(listResourceFunc(ctx, &allPods, workerFilters...), time.Second*5, time.Millisecond*500). + Should(Equal(0), fmt.Sprintf("all pods %v", allPods.Items)) + Eventually(listResourceFunc(ctx, &allPods, headFilters...), time.Second*5, time.Millisecond*500). + Should(Equal(1), fmt.Sprintf("all pods %v", allPods.Items)) + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*5, time.Millisecond*500). + Should(Equal(1), fmt.Sprintf("all pods %v", allPods.Items)) + }) + + It("Delete the cluster", func() { + err := k8sClient.Delete(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Describe("Suspend RayCluster worker group with Autoscaler enabled", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-suspend-workergroup-autoscaler", namespace) + rayCluster.Spec.EnableInTreeAutoscaling = ptr.To(true) + allPods := corev1.PodList{} + allFilters := common.RayClusterAllPodsAssociationOptions(rayCluster).ToListOptions() + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually(getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of Pods", func() { + Eventually(listResourceFunc(ctx, &allPods, allFilters...), time.Second*3, time.Millisecond*500). + Should(Equal(4), fmt.Sprintf("all pods %v", allPods.Items)) + }) + + It("Setting suspend=true in first worker group should not fail", func() { + // suspend the Raycluster worker group + err := updateRayClusterWorkerGroupSuspendField(ctx, rayCluster, true) + Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster") + }) + + It("Worker pods should not be deleted and head pod should still be running", func() { + Consistently(listResourceFunc(ctx, &allPods, workerFilters...), time.Second*5, time.Millisecond*500). + Should(Equal(3), fmt.Sprintf("all pods %v", allPods.Items)) + Consistently(listResourceFunc(ctx, &allPods, headFilters...), time.Second*5, time.Millisecond*500). + Should(Equal(1), fmt.Sprintf("all pods %v", allPods.Items)) + }) + + It("Delete the cluster", func() { + err := k8sClient.Delete(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Describe("RayCluster with a multi-host worker group", Ordered, func() { ctx := context.Background() namespace := "default" rayCluster := rayClusterTemplate("raycluster-multihost", namespace) numOfHosts := int32(4) rayCluster.Spec.WorkerGroupSpecs[0].NumOfHosts = numOfHosts - rayCluster.Spec.EnableInTreeAutoscaling = pointer.Bool(true) + rayCluster.Spec.EnableInTreeAutoscaling = ptr.To(true) workerPods := corev1.PodList{} workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() @@ -486,10 +925,10 @@ var _ = Context("Inside the default namespace", func() { // (1) Ray Autoscaler is enabled. // (2) There is only one worker group, and its `replicas` is set to 3, and `workersToDelete` is empty. // (3) The worker group is a multi-host TPU PodSlice consisting of 4 hosts. - Expect(*rayCluster.Spec.EnableInTreeAutoscaling).To(Equal(true)) - Expect(len(rayCluster.Spec.WorkerGroupSpecs)).To(Equal(1)) + Expect(*rayCluster.Spec.EnableInTreeAutoscaling).To(BeTrue()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) Expect(rayCluster.Spec.WorkerGroupSpecs[0].NumOfHosts).To(Equal(numOfHosts)) - Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(pointer.Int32(3))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) }) @@ -513,7 +952,7 @@ var _ = Context("Inside the default namespace", func() { Eventually( getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil()) - rayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(2) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](2) rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{ workerPods.Items[0].Name, workerPods.Items[1].Name, workerPods.Items[2].Name, workerPods.Items[3].Name, } @@ -528,7 +967,7 @@ var _ = Context("Inside the default namespace", func() { // Ray Autoscaler should clean up WorkersToDelete after scaling process has finished. // Call cleanUpWorkersToDelete to simulate the behavior of the Ray Autoscaler. - cleanUpWorkersToDelete(ctx, rayCluster, 0) + cleanUpWorkersToDelete(ctx, rayCluster) }) It("Simulate Ray Autoscaler scales up", func() { @@ -536,7 +975,7 @@ var _ = Context("Inside the default namespace", func() { Eventually( getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil()) - rayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(4) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](4) return k8sClient.Update(ctx, rayCluster) }) Expect(err).NotTo(HaveOccurred(), "Failed to update RayCluster custom resource") @@ -554,7 +993,7 @@ var _ = Context("Inside the default namespace", func() { time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) pod := workerPods.Items[0] - err := k8sClient.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: pointer.Int64(0)}) + err := k8sClient.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: ptr.To[int64](0)}) Expect(err).NotTo(HaveOccurred(), "Failed to delete a Pod") Eventually( listResourceFunc(ctx, &workerPods, workerFilters...), @@ -565,7 +1004,7 @@ var _ = Context("Inside the default namespace", func() { }) }) - Describe("RayCluster with PodTemplate referencing a different namespace", func() { + Describe("RayCluster with PodTemplate referencing a different namespace", Ordered, func() { ctx := context.Background() namespace := "default" rayCluster := rayClusterTemplate("raycluster-podtemplate-namespace", namespace) @@ -595,7 +1034,308 @@ var _ = Context("Inside the default namespace", func() { // In suite_test.go, we set `RayClusterReconcilerOptions.HeadSidecarContainers` to include a FluentBit sidecar. err := k8sClient.List(ctx, &headPods, headFilters...) Expect(err).NotTo(HaveOccurred(), "Failed to list head Pods") - Expect(len(headPods.Items)).Should(Equal(1), "headPods: %v", headPods.Items) + Expect(headPods.Items).Should(HaveLen(1), "headPods: %v", headPods.Items) + }) + }) + + Describe("RayCluster without resource request", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("no-resource-req", namespace) + rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Limits = corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + } + rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Limits = corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + } + headPods := corev1.PodList{} + workerPods := corev1.PodList{} + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + It("Verify RayCluster spec", func() { + // These test are designed based on the following assumptions: + // (1) Both head and worker Pods do not have resource requests, but they have resource limits. + // (2) There is only one worker group, and its `replicas` is set to 3. + Expect(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Requests).To(BeNil()) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests).To(BeNil()) + Expect(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Resources.Limits).NotTo(BeNil()) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Limits).NotTo(BeNil()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of worker Pods", func() { + numWorkerPods := 3 + Eventually( + listResourceFunc(ctx, &workerPods, workerFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) + }) + + It("Create a head Pod", func() { + err := k8sClient.List(ctx, &headPods, headFilters...) + Expect(err).NotTo(HaveOccurred(), "Failed to list head Pods") + Expect(headPods.Items).Should(HaveLen(1), "headPods: %v", headPods.Items) + }) + + It("Update all Pods to Running", func() { + for _, headPod := range headPods.Items { + headPod.Status.Phase = corev1.PodRunning + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(headPods, headFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "Head Pod should be running.") + + for _, workerPod := range workerPods.Items { + workerPod.Status.Phase = corev1.PodRunning + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) + } + + Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(workerPods, workerFilters).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "All worker Pods should be running.") + }) + + It("RayCluster's .status.state should be updated to 'ready' shortly after all Pods are Running", func() { + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("Check DesiredMemory and DesiredCPU", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + desiredMemory := resource.MustParse("4Gi") + desiredCPU := resource.MustParse("4") + Expect(rayCluster.Status.DesiredMemory).To(Equal(desiredMemory)) + Expect(rayCluster.Status.DesiredCPU).To(Equal(desiredCPU)) + }) + }) + + Describe("RayCluster with invalid NumOfHosts", Ordered, func() { + // Some users only upgrade the KubeRay image without upgrading the CRD. For example, when a + // user upgrades the KubeRay operator from v1.0.0 to v1.1.0 without upgrading the CRD, the + // KubeRay operator will use the zero value of `NumOfHosts` in the CRD. Hence, all worker + // Pods will be deleted. This test case is designed to prevent Pods from being deleted. + ctx := context.Background() + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-invalid-numofhosts", namespace) + numOfHosts := int32(0) + rayCluster.Spec.WorkerGroupSpecs[0].NumOfHosts = numOfHosts + workerPods := corev1.PodList{} + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + + It("Verify RayCluster spec", func() { + // These test are designed based on the following assumptions: + // (1) There is only one worker group, and its `replicas` is set to 3, and `workersToDelete` is empty. + // (2) The worker group has an invalid `numOfHosts` value of 0. + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].NumOfHosts).To(Equal(numOfHosts)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](3))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) + }) + + It("Create a RayCluster custom resource", func() { + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + }) + + It("Check the number of worker Pods", func() { + numWorkerPods := 3 * int(numOfHosts) + Eventually( + listResourceFunc(ctx, &workerPods, workerFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(numWorkerPods), fmt.Sprintf("workerGroup %v", workerPods.Items)) + }) + }) + + Describe("RayCluster with RayClusterStatusConditions feature gate enabled", func() { + BeforeEach(func() { + Expect(features.Enabled(features.RayClusterStatusConditions)).To(BeTrue()) + }) + + It("Should handle HeadPodReady and RayClusterProvisioned conditions correctly", func(ctx SpecContext) { + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-status-conditions-enabled", namespace) + rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](1) + rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas = ptr.To[int32](1) + var headPod corev1.Pod + var workerPod corev1.Pod + headPods := corev1.PodList{} + workerPods := corev1.PodList{} + workerFilters := common.RayClusterGroupPodsAssociationOptions(rayCluster, rayCluster.Spec.WorkerGroupSpecs[0].GroupName).ToListOptions() + headFilters := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + + By("Verify RayCluster spec") + Expect(rayCluster.Spec.EnableInTreeAutoscaling).To(BeNil()) + Expect(rayCluster.Spec.WorkerGroupSpecs).To(HaveLen(1)) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(ptr.To[int32](1))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas).To(Equal(ptr.To[int32](1))) + Expect(rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete).To(BeEmpty()) + + By("Create a RayCluster custom resource") + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + + By("Check the number of worker Pods") + Eventually( + listResourceFunc(ctx, &workerPods, workerFilters...), + time.Second*3, time.Millisecond*500).Should(Equal(1), fmt.Sprintf("workerGroup %v", workerPods.Items)) + workerPod = workerPods.Items[0] + + By("Get the head pod") + err = k8sClient.List(ctx, &headPods, headFilters...) + Expect(err).NotTo(HaveOccurred(), "Failed to list head Pods") + Expect(headPods.Items).Should(HaveLen(1), "headPods: %v", headPods.Items) + headPod = headPods.Items[0] + + By("Check RayCluster conditions empty initially") + // Initially, neither head Pod nor worker Pod are ready. The RayClusterProvisioned condition should not be present. + Expect(testRayCluster.Status.Conditions).To(BeEmpty()) + + By("Update the head pod to Running and Ready") + headPod.Status.Phase = corev1.PodRunning + headPod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + } + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) + + By("Check RayCluster HeadPodReady condition is true") + // The head pod is ready, so HeadPodReady condition should be added and set to True. + Eventually( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.HeadPodReady), metav1.ConditionTrue) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + By("Check RayCluster RayClusterProvisioned condition is false") + // But the worker pod is not ready yet, RayClusterProvisioned condition should be false. + Consistently( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionFalse(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + By("Update the worker pod to Running") + workerPod.Status.Phase = corev1.PodRunning + workerPod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + } + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) + + By("Check RayCluster RayClusterProvisioned condition is true") + // All Ray Pods are ready for the first time, RayClusterProvisioned condition should be added and set to True. + Eventually( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + By("Update the worker pod to NotReady") + workerPod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + } + Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed()) + + By("Check RayCluster RayClusterProvisioned condition is true") + // The worker pod fails readiness, but since RayClusterProvisioned focuses solely on whether all Ray Pods are ready for the first time, + // RayClusterProvisioned condition should still be True. + Consistently( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + By("Update the head pod to NotReady") + headPod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + } + Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed()) + + By("Check RayCluster HeadPodReady condition is false") + // The head pod is not ready, so HeadPodReady should be false. + Eventually( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.HeadPodReady), metav1.ConditionFalse) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + By("Check RayCluster RayClusterProvisioned condition is still true") + // The head pod also fails readiness, RayClusterProvisioned condition not changed. + Eventually( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + }) + + It("Should handle RayClusterReplicaFailure condition correctly", func(ctx SpecContext) { + namespace := "default" + rayCluster := rayClusterTemplate("raycluster-status-conditions-enabled-invalid", namespace) + rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].ImagePullPolicy = "!invalid!" + + By("Create an invalid RayCluster custom resource") + err := k8sClient.Create(ctx, rayCluster) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayCluster: %v", rayCluster.Name) + + By("Check RayCluster RayClusterReplicaFailure condition is true") + Eventually( + func() bool { + if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil { + return false + } + return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterReplicaFailure), metav1.ConditionTrue) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) }) }) }) diff --git a/ray-operator/controllers/ray/raycluster_controller_fake_test.go b/ray-operator/controllers/ray/raycluster_controller_unit_test.go similarity index 62% rename from ray-operator/controllers/ray/raycluster_controller_fake_test.go rename to ray-operator/controllers/ray/raycluster_controller_unit_test.go index 5aef753ea5c..69a406447b1 100644 --- a/ray-operator/controllers/ray/raycluster_controller_fake_test.go +++ b/ray-operator/controllers/ray/raycluster_controller_unit_test.go @@ -17,14 +17,21 @@ package ray import ( "context" + "errors" + "fmt" "os" + "strconv" + "strings" "testing" "time" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/expectations" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" + "github.com/ray-project/kuberay/ray-operator/pkg/features" + "github.com/ray-project/kuberay/ray-operator/test/support" . "github.com/onsi/ginkgo/v2" "github.com/stretchr/testify/assert" @@ -33,6 +40,7 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -41,11 +49,11 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -65,11 +73,17 @@ var ( testRayCluster *rayv1.RayCluster headSelector labels.Selector headNodeIP string + headNodeName string testServices []runtime.Object workerSelector labels.Selector workersToDelete []string ) +const ( + // MultiKueueController represents the vaue of the MultiKueue controller + MultiKueueController = "kueue.x-k8s.io/multikueue" +) + func setupTest(t *testing.T) { logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) namespaceStr = "default" @@ -81,10 +95,11 @@ func setupTest(t *testing.T) { expectNumOfHostNum = 1 workersToDelete = []string{"pod1", "pod2"} headNodeIP = "1.2.3.4" + headNodeName = "headNode" testPods = []runtime.Object{ &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "headNode", + Name: headNodeName, Namespace: namespaceStr, Labels: map[string]string{ utils.RayNodeLabelKey: "yes", @@ -218,7 +233,7 @@ func setupTest(t *testing.T) { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Command: []string{"echo"}, Args: []string{"Hello Ray"}, }, @@ -248,7 +263,7 @@ func setupTest(t *testing.T) { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Command: []string{"echo"}, Args: []string{"Hello Ray"}, }, @@ -268,7 +283,7 @@ func setupTest(t *testing.T) { testPodsNoHeadIP = []runtime.Object{ &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "headNode", + Name: headNodeName, Namespace: namespaceStr, Labels: map[string]string{ utils.RayNodeLabelKey: "yes", @@ -303,7 +318,7 @@ func setupTest(t *testing.T) { Containers: []corev1.Container{ { Name: "ray-head", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Command: []string{"python"}, Args: []string{"/opt/code.py"}, Env: []corev1.EnvVar{ @@ -323,9 +338,9 @@ func setupTest(t *testing.T) { }, WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { - Replicas: pointer.Int32(expectReplicaNum), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](expectReplicaNum), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), NumOfHosts: expectNumOfHostNum, GroupName: groupNameStr, RayStartParams: map[string]string{ @@ -337,7 +352,7 @@ func setupTest(t *testing.T) { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Command: []string{"echo"}, Args: []string{"Hello Ray"}, Env: []corev1.EnvVar{ @@ -486,11 +501,11 @@ func TestReconcile_RemoveWorkersToDelete_RandomDelete(t *testing.T) { // Simulate the Ray Autoscaler attempting to scale down. assert.Equal(t, expectedNumWorkersToDelete, len(testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete)) - testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) @@ -581,9 +596,10 @@ func TestReconcile_RemoveWorkersToDelete_NoRandomDelete(t *testing.T) { assert.Equal(t, expectedNumWorkersToDelete, len(testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete)-tc.numNonExistPods) testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) @@ -627,11 +643,11 @@ func TestReconcile_RandomDelete_OK(t *testing.T) { assert.Nil(t, err, "Fail to get pod list") assert.Equal(t, len(testPods), len(podList.Items), "Init pod list len is wrong") - testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) @@ -692,9 +708,10 @@ func TestReconcile_PodDeleted_Diff0_OK(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Since the desired state of the workerGroup is 3 replicas, @@ -749,9 +766,10 @@ func TestReconcile_PodDeleted_DiffLess0_OK(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Since the desired state of the workerGroup is 3 replicas, the controller @@ -804,9 +822,10 @@ func TestReconcile_Diff0_WorkersToDelete_OK(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Pod3 and Pod4 should be deleted because of the workersToDelete. @@ -851,14 +870,14 @@ func TestReconcile_PodCrash_DiffLess0_OK(t *testing.T) { oldNumWorkerPods := len(testPods) - numHeadPods tests := map[string]struct { - ENABLE_RANDOM_POD_DELETE bool + enableRandomPodDelete bool }{ - // When Autoscaler is enabled, the random Pod deletion is controleld by the feature flag `ENABLE_RANDOM_POD_DELETE`. + // When Autoscaler is enabled, the random Pod deletion is controleld by the feature flag `enableRandomPodDelete`. "Enable random Pod deletion": { - ENABLE_RANDOM_POD_DELETE: true, + enableRandomPodDelete: true, }, "Disable random Pod deletion": { - ENABLE_RANDOM_POD_DELETE: false, + enableRandomPodDelete: false, }, } @@ -876,21 +895,22 @@ func TestReconcile_PodCrash_DiffLess0_OK(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } - if tc.ENABLE_RANDOM_POD_DELETE { + if tc.enableRandomPodDelete { os.Setenv(utils.ENABLE_RANDOM_POD_DELETE, "true") } else { os.Setenv(utils.ENABLE_RANDOM_POD_DELETE, "false") } cluster := testRayCluster.DeepCopy() - // Case 1: ENABLE_RANDOM_POD_DELETE is true. + // Case 1: enableRandomPodDelete is true. // Since the desired state of the workerGroup is 3 replicas, the controller will delete a worker Pod randomly. // After the deletion, the number of worker Pods should be 3. - // Case 2: ENABLE_RANDOM_POD_DELETE is false. + // Case 2: enableRandomPodDelete is false. // Only the Pod in the `workersToDelete` will be deleted. After the deletion, the number of worker Pods should be 4. err = testRayClusterReconciler.reconcilePods(ctx, cluster) assert.Nil(t, err, "Fail to reconcile Pods") @@ -901,13 +921,13 @@ func TestReconcile_PodCrash_DiffLess0_OK(t *testing.T) { }) assert.Nil(t, err, "Fail to get pod list after reconcile") - if tc.ENABLE_RANDOM_POD_DELETE { - // Case 1: ENABLE_RANDOM_POD_DELETE is true. + if tc.enableRandomPodDelete { + // Case 1: enableRandomPodDelete is true. assert.Equal(t, expectedNumWorkerPods, len(podList.Items)) assert.Equal(t, expectedNumWorkerPods, getNotFailedPodItemNum(podList), "Replica number is wrong after reconcile expect %d actual %d", expectReplicaNum, getNotFailedPodItemNum(podList)) } else { - // Case 2: ENABLE_RANDOM_POD_DELETE is false. + // Case 2: enableRandomPodDelete is false. assert.Equal(t, expectedNumWorkerPods+1, len(podList.Items)) assert.Equal(t, expectedNumWorkerPods+1, getNotFailedPodItemNum(podList), "Replica number is wrong after reconcile expect %d actual %d", expectReplicaNum, getNotFailedPodItemNum(podList)) @@ -919,44 +939,61 @@ func TestReconcile_PodCrash_DiffLess0_OK(t *testing.T) { func TestReconcile_PodEvicted_DiffLess0_OK(t *testing.T) { setupTest(t) - fakeClient := clientFake.NewClientBuilder(). - WithRuntimeObjects(testPods...). - Build() - ctx := context.Background() + tests := map[string]struct { + restartPolicy corev1.RestartPolicy + }{ + "Pod with RestartPolicyAlways": { + restartPolicy: corev1.RestartPolicyAlways, + }, + "Pod with RestartPolicyOnFailure": { + restartPolicy: corev1.RestartPolicyOnFailure, + }, + } - podList := corev1.PodList{} - err := fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + fakeClient := clientFake.NewClientBuilder(). + WithRuntimeObjects(testPods...). + Build() + ctx := context.Background() + podList := corev1.PodList{} + err := fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) - assert.Nil(t, err, "Fail to get pod list") - assert.Equal(t, len(testPods), len(podList.Items), "Init pod list len is wrong") + assert.Nil(t, err, "Fail to get pod list") + assert.Equal(t, len(testPods), len(podList.Items), "Init pod list len is wrong") - // Simulate head pod get evicted. - podList.Items[0].Status.Phase = corev1.PodFailed - podList.Items[0].Status.Reason = "Evicted" - err = fakeClient.Status().Update(ctx, &podList.Items[0]) - assert.Nil(t, err, "Fail to update head Pod status") + // Simulate head pod get evicted. + podList.Items[0].Spec.RestartPolicy = tc.restartPolicy + err = fakeClient.Update(ctx, &podList.Items[0]) + assert.Nil(t, err, "Fail to update head Pod restart policy") + podList.Items[0].Status.Phase = corev1.PodFailed + err = fakeClient.Status().Update(ctx, &podList.Items[0]) + assert.Nil(t, err, "Fail to update head Pod status") - testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, - } + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), + } - err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) - // The head Pod with the status `Failed` will be deleted, and the function will return an - // error to requeue the request with a short delay. If the function returns nil, the controller - // will requeue the request after RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV (default: 300) seconds. - assert.NotNil(t, err) + err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) + // The head Pod with the status `Failed` will be deleted, and the function will return an + // error to requeue the request with a short delay. If the function returns nil, the controller + // will requeue the request after RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV (default: 300) seconds. + assert.NotNil(t, err) - // Filter head pod - err = fakeClient.List(ctx, &podList, &client.ListOptions{ - LabelSelector: headSelector, - Namespace: namespaceStr, - }) + // Filter head pod + err = fakeClient.List(ctx, &podList, &client.ListOptions{ + LabelSelector: headSelector, + Namespace: namespaceStr, + }) - assert.Nil(t, err, "Fail to get pod list after reconcile") - assert.Equal(t, 0, len(podList.Items), - "Evicted head should be deleted after reconcile expect %d actual %d", 0, len(podList.Items)) + assert.Nil(t, err, "Fail to get pod list after reconcile") + assert.Equal(t, 0, len(podList.Items), + "Evicted head should be deleted after reconcile expect %d actual %d", 0, len(podList.Items)) + }) + } } func TestReconcileHeadService(t *testing.T) { @@ -976,6 +1013,7 @@ func TestReconcileHeadService(t *testing.T) { Labels: map[string]string{ utils.RayClusterLabelKey: cluster.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(cluster.Name, rayv1.HeadNode)), }, }, } @@ -989,13 +1027,15 @@ func TestReconcileHeadService(t *testing.T) { headServiceSelector := labels.SelectorFromSet(map[string]string{ utils.RayClusterLabelKey: cluster.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(cluster.Name, rayv1.HeadNode)), }) // Initialize RayCluster reconciler. r := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Case 1: Head service does not exist. @@ -1062,9 +1102,10 @@ func TestReconcileHeadlessService(t *testing.T) { // Initialize RayCluster reconciler. r := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } headlessServiceSelector := labels.SelectorFromSet(map[string]string{ @@ -1137,9 +1178,10 @@ func TestReconcile_AutoscalerServiceAccount(t *testing.T) { assert.True(t, k8serrors.IsNotFound(err), "Head group service account should not exist yet") testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcileAutoscalerServiceAccount(ctx, testRayCluster) @@ -1170,9 +1212,10 @@ func TestReconcile_Autoscaler_ServiceAccountName(t *testing.T) { // Initialize the reconciler testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // If users specify ServiceAccountName for the head Pod, they need to create a ServiceAccount themselves. @@ -1215,9 +1258,10 @@ func TestReconcile_AutoscalerRoleBinding(t *testing.T) { assert.True(t, k8serrors.IsNotFound(err), "autoscaler RoleBinding should not exist yet") testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcileAutoscalerRoleBinding(ctx, testRayCluster) @@ -1251,14 +1295,18 @@ func TestReconcile_UpdateClusterReason(t *testing.T) { assert.Empty(t, cluster.Status.Reason, "Cluster reason should be empty") testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } reason := "test reason" - err = testRayClusterReconciler.updateClusterReason(ctx, testRayCluster, reason) + newTestRayCluster := testRayCluster.DeepCopy() + newTestRayCluster.Status.Reason = reason + inconsistent, err := testRayClusterReconciler.updateRayClusterStatus(ctx, testRayCluster, newTestRayCluster) assert.Nil(t, err, "Fail to update cluster reason") + assert.True(t, inconsistent) err = fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err, "Fail to get RayCluster after updating reason") @@ -1281,16 +1329,16 @@ func TestUpdateEndpoints(t *testing.T) { } expected := map[string]string{ - "client": "10001", - "dashboard": "8265", - "metrics": "8080", - "redis": "6379", - "serve": "8000", + "client": "10001", + "dashboard": "8265", + "metrics": "8080", + "gcs-server": "6379", + "serve": "8000", } assert.Equal(t, expected, testRayCluster.Status.Endpoints, "RayCluster status endpoints not updated") } -func TestGetHeadPodIP(t *testing.T) { +func TestGetHeadPodIPAndNameFromGetRayClusterHeadPod(t *testing.T) { setupTest(t) extraHeadPod := &corev1.Pod{ @@ -1306,13 +1354,15 @@ func TestGetHeadPodIP(t *testing.T) { } tests := map[string]struct { - pods []runtime.Object expectedIP string + expectedName string + pods []runtime.Object returnsError bool }{ "get expected Pod IP if there's one head node": { pods: testPods, expectedIP: headNodeIP, + expectedName: headNodeName, returnsError: false, }, "no error if there's no head node": { @@ -1323,11 +1373,12 @@ func TestGetHeadPodIP(t *testing.T) { "no error if there's more than one head node": { pods: append(testPods, extraHeadPod), expectedIP: "", - returnsError: false, + returnsError: true, }, "no error if head pod ip is not yet set": { pods: testPodsNoHeadIP, expectedIP: "", + expectedName: headNodeName, returnsError: false, }, } @@ -1336,26 +1387,26 @@ func TestGetHeadPodIP(t *testing.T) { t.Run(name, func(t *testing.T) { fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects(tc.pods...).Build() - testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + ip, name := "", "" + headPod, err := common.GetRayClusterHeadPod(context.TODO(), fakeClient, testRayCluster) + if headPod != nil { + ip = headPod.Status.PodIP + name = headPod.Name } - ip, err := testRayClusterReconciler.getHeadPodIP(context.TODO(), testRayCluster) - if tc.returnsError { - assert.NotNil(t, err, "getHeadPodIP should return error") + assert.NotNil(t, err, "GetRayClusterHeadPod should return error") } else { - assert.Nil(t, err, "getHeadPodIP should not return error") + assert.Nil(t, err, "GetRayClusterHeadPod should not return error") } - assert.Equal(t, tc.expectedIP, ip, "getHeadPodIP returned unexpected IP") + assert.Equal(t, tc.expectedIP, ip, "GetRayClusterHeadPod returned unexpected IP") + assert.Equal(t, tc.expectedName, name, "GetRayClusterHeadPod returned unexpected name") }) } } -func TestGetHeadServiceIP(t *testing.T) { +func TestGetHeadServiceIPAndName(t *testing.T) { setupTest(t) headServiceIP := "1.2.3.4" @@ -1377,23 +1428,27 @@ func TestGetHeadServiceIP(t *testing.T) { } tests := map[string]struct { - services []runtime.Object expectedIP string + expectedName string + services []runtime.Object returnsError bool }{ "get expected Service IP if there's one head Service": { services: testServices, expectedIP: headServiceIP, + expectedName: headService.Name, returnsError: false, }, "get error if there's no head Service": { services: []runtime.Object{}, expectedIP: "", + expectedName: "", returnsError: true, }, "get error if there's more than one head Service": { services: append(testServices, extraHeadService), expectedIP: "", + expectedName: "", returnsError: true, }, } @@ -1401,26 +1456,50 @@ func TestGetHeadServiceIP(t *testing.T) { for name, tc := range tests { t.Run(name, func(t *testing.T) { fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects(tc.services...).Build() - testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } - ip, err := testRayClusterReconciler.getHeadServiceIP(context.TODO(), testRayCluster) - + ip, name, err := testRayClusterReconciler.getHeadServiceIPAndName(context.TODO(), testRayCluster) if tc.returnsError { - assert.NotNil(t, err, "getHeadServiceIP should return error") + assert.NotNil(t, err, "getHeadServiceIPAndName should return error") } else { - assert.Nil(t, err, "getHeadServiceIP should not return error") + assert.Nil(t, err, "getHeadServiceIPAndName should not return error") } - assert.Equal(t, tc.expectedIP, ip, "getHeadServiceIP returned unexpected IP") + assert.Equal(t, tc.expectedIP, ip, "getHeadServiceIPAndName returned unexpected IP") + assert.Equal(t, tc.expectedName, name, "getHeadServiceIPAndName returned unexpected name") }) } } +func TestGetHeadServiceIPAndNameOnHeadlessService(t *testing.T) { + setupTest(t) + + headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) + if err != nil { + t.Errorf("failed to build head service: %v", err) + } + assert.Equal(t, headService.Spec.ClusterIP, corev1.ClusterIPNone, "BuildServiceForHeadPod returned unexpected ClusterIP") + + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects(headService).WithRuntimeObjects(testPods...).Build() + + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + } + + ip, name, err := testRayClusterReconciler.getHeadServiceIPAndName(context.TODO(), testRayCluster) + + assert.Nil(t, err) + assert.Equal(t, headNodeIP, ip, "getHeadServiceIPAndName returned unexpected IP") + assert.Equal(t, headService.Name, name, "getHeadServiceIPAndName returned unexpected name") +} + func TestUpdateStatusObservedGeneration(t *testing.T) { setupTest(t) @@ -1462,13 +1541,14 @@ func TestUpdateStatusObservedGeneration(t *testing.T) { // Initialize RayCluster reconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Compare the values of `Generation` and `ObservedGeneration` to check if they match. - newInstance, err := testRayClusterReconciler.calculateStatus(ctx, testRayCluster) + newInstance, err := testRayClusterReconciler.calculateStatus(ctx, testRayCluster, nil) assert.Nil(t, err) err = fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err) @@ -1495,21 +1575,25 @@ func TestReconcile_UpdateClusterState(t *testing.T) { cluster := rayv1.RayCluster{} err := fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err, "Fail to get RayCluster") - assert.Empty(t, cluster.Status.State, "Cluster state should be empty") + assert.Empty(t, cluster.Status.State, "Cluster state should be empty") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: newScheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } state := rayv1.Ready - err = testRayClusterReconciler.updateClusterState(ctx, testRayCluster, state) + newTestRayCluster := testRayCluster.DeepCopy() + newTestRayCluster.Status.State = state //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + inconsistent, err := testRayClusterReconciler.updateRayClusterStatus(ctx, testRayCluster, newTestRayCluster) assert.Nil(t, err, "Fail to update cluster state") + assert.True(t, inconsistent) err = fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err, "Fail to get RayCluster after updating state") - assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated") + assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } func TestInconsistentRayClusterStatus(t *testing.T) { @@ -1526,6 +1610,7 @@ func TestInconsistentRayClusterStatus(t *testing.T) { timeNow := metav1.Now() oldStatus := rayv1.RayClusterStatus{ State: rayv1.Ready, + ReadyWorkerReplicas: 1, AvailableWorkerReplicas: 1, DesiredWorkerReplicas: 1, MinWorkerReplicas: 1, @@ -1552,7 +1637,7 @@ func TestInconsistentRayClusterStatus(t *testing.T) { // Case 1: `State` is different => return true newStatus := oldStatus.DeepCopy() - newStatus.State = rayv1.Failed + newStatus.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) // Case 2: `Reason` is different => return true @@ -1560,49 +1645,63 @@ func TestInconsistentRayClusterStatus(t *testing.T) { newStatus.Reason = "new reason" assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 3: `AvailableWorkerReplicas` is different => return true + // Case 3: `ReadyWorkerReplicas` is different => return true + newStatus = oldStatus.DeepCopy() + newStatus.ReadyWorkerReplicas = oldStatus.ReadyWorkerReplicas + 1 + assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) + + // Case 4: `AvailableWorkerReplicas` is different => return true newStatus = oldStatus.DeepCopy() newStatus.AvailableWorkerReplicas = oldStatus.AvailableWorkerReplicas + 1 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 4: `DesiredWorkerReplicas` is different => return true + // Case 5: `DesiredWorkerReplicas` is different => return true newStatus = oldStatus.DeepCopy() newStatus.DesiredWorkerReplicas = oldStatus.DesiredWorkerReplicas + 1 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 5: `MinWorkerReplicas` is different => return true + // Case 6: `MinWorkerReplicas` is different => return true newStatus = oldStatus.DeepCopy() newStatus.MinWorkerReplicas = oldStatus.MinWorkerReplicas + 1 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 6: `MaxWorkerReplicas` is different => return true + // Case 7: `MaxWorkerReplicas` is different => return true newStatus = oldStatus.DeepCopy() newStatus.MaxWorkerReplicas = oldStatus.MaxWorkerReplicas + 1 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 7: `Endpoints` is different => return true + // Case 8: `Endpoints` is different => return true newStatus = oldStatus.DeepCopy() newStatus.Endpoints["fakeEndpoint"] = "10009" assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 8: `Head` is different => return true + // Case 9: `Head` is different => return true newStatus = oldStatus.DeepCopy() newStatus.Head.PodIP = "test head pod ip" assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 9: `LastUpdateTime` is different => return false + // Case 10: `LastUpdateTime` is different => return false newStatus = oldStatus.DeepCopy() newStatus.LastUpdateTime = &metav1.Time{Time: timeNow.Add(time.Hour)} assert.False(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) - // Case 10: `ObservedGeneration` is different => return false + // Case 11: `ObservedGeneration` is different => return false newStatus = oldStatus.DeepCopy() newStatus.ObservedGeneration = oldStatus.ObservedGeneration + 1 assert.False(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) + + // Case 12: `Conditions` is different => return true + newStatus = oldStatus.DeepCopy() + meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{Type: string(rayv1.RayClusterReplicaFailure), Status: metav1.ConditionTrue}) + assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) } func TestCalculateStatus(t *testing.T) { setupTest(t) + assert.True(t, features.Enabled(features.RayClusterStatusConditions)) + + // disable feature gate for the following tests + restoreFeatureFlag := features.SetFeatureGateDuringTest(t, features.RayClusterStatusConditions, false) // Create a new scheme with CRDs, Pod, Service schemes. newScheme := runtime.NewScheme() @@ -1614,20 +1713,43 @@ func TestCalculateStatus(t *testing.T) { headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) assert.Nil(t, err, "Failed to build head service.") headService.Spec.ClusterIP = headServiceIP + podReadyStatus := corev1.PodStatus{ + PodIP: headNodeIP, + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + } + headLabel := map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + } + workerLabel := map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.WorkerNode), + } headPod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "headNode", Namespace: namespaceStr, - Labels: map[string]string{ - utils.RayClusterLabelKey: instanceName, - utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), - }, - }, - Status: corev1.PodStatus{ - PodIP: headNodeIP, + Labels: headLabel, }, + Status: podReadyStatus, } runtimeObjects := []runtime.Object{headPod, headService} + for i := int32(0); i < expectReplicaNum; i++ { + runtimeObjects = append(runtimeObjects, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workerNode-" + strconv.Itoa(int(i)), + Namespace: namespaceStr, + Labels: workerLabel, + }, + Status: podReadyStatus, + }) + } // Initialize a fake client with newScheme and runtimeObjects. fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() @@ -1641,89 +1763,499 @@ func TestCalculateStatus(t *testing.T) { } // Test head information - newInstance, err := r.calculateStatus(ctx, testRayCluster) + newInstance, err := r.calculateStatus(ctx, testRayCluster, nil) assert.Nil(t, err) assert.Equal(t, headNodeIP, newInstance.Status.Head.PodIP) assert.Equal(t, headServiceIP, newInstance.Status.Head.ServiceIP) -} + assert.Equal(t, headService.Name, newInstance.Status.Head.ServiceName) + assert.NotNil(t, newInstance.Status.StateTransitionTimes, "Cluster state transition timestamp should be created") + assert.Equal(t, newInstance.Status.LastUpdateTime, newInstance.Status.StateTransitionTimes[rayv1.Ready]) -func Test_TerminatedWorkers_NoAutoscaler(t *testing.T) { - setupTest(t) + // Test reconcilePodsErr with the feature gate disabled + newInstance, err = r.calculateStatus(ctx, testRayCluster, errors.Join(utils.ErrFailedCreateHeadPod, errors.New("invalid"))) + assert.Nil(t, err) + assert.Empty(t, newInstance.Status.Conditions) - // TODO (kevin85421): The tests in this file are not independent. As a workaround, - // I added the assertion to prevent the test logic from being affected by other changes. - // However, we should refactor the tests in the future. + // enable feature gate for the following tests + restoreFeatureFlag() - // This test makes some assumptions about the testRayCluster object. - // (1) 1 workerGroup - // (2) The goal state of the workerGroup is 3 replicas. - // (3) Set the `WorkersToDelete` field to an empty slice. - // (4) Disable autoscaling. - assert.Equal(t, 1, len(testRayCluster.Spec.WorkerGroupSpecs), "This test assumes only one worker group.") - expectedNumWorkerPods := int(*testRayCluster.Spec.WorkerGroupSpecs[0].Replicas) - assert.Equal(t, 3, expectedNumWorkerPods, "This test assumes the expected number of worker pods is 3.") - testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} - testRayCluster.Spec.EnableInTreeAutoscaling = nil + // Test CheckRayHeadRunningAndReady with head pod running and ready + newInstance, _ = r.calculateStatus(ctx, testRayCluster, nil) + assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.HeadPodReady), metav1.ConditionTrue)) - // This test makes some assumptions about the testPods object. - // `testPods` contains 6 pods, including 1 head pod and 5 worker pods. - assert.Equal(t, 6, len(testPods), "This test assumes the testPods object contains 6 pods.") - numHeadPods := 1 - oldNumWorkerPods := len(testPods) - numHeadPods + // Test CheckRayHeadRunningAndReady with head pod not ready + headPod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + } + runtimeObjects = []runtime.Object{headPod, headService} + fakeClient = clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + r.Client = fakeClient + newInstance, _ = r.calculateStatus(ctx, testRayCluster, nil) + assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.HeadPodReady), metav1.ConditionFalse)) - // Initialize a fake client with newScheme and runtimeObjects. - fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects(testPods...).Build() - ctx := context.Background() + // Test CheckRayHeadRunningAndReady with head pod not running + headPod.Status.Phase = corev1.PodFailed + runtimeObjects = []runtime.Object{headPod, headService} + fakeClient = clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + r.Client = fakeClient + newInstance, _ = r.calculateStatus(ctx, testRayCluster, nil) + assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.HeadPodReady), metav1.ConditionFalse)) - // Get the pod list from the fake client. - podList := corev1.PodList{} - err := fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) - assert.Nil(t, err, "Fail to get pod list") - assert.Equal(t, oldNumWorkerPods+numHeadPods, len(podList.Items), "Init pod list len is wrong") + // Test reconcilePodsErr with the feature gate enabled + newInstance, err = r.calculateStatus(ctx, testRayCluster, errors.Join(utils.ErrFailedCreateHeadPod, errors.New("invalid"))) + assert.Nil(t, err) + assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.RayClusterReplicaFailure), metav1.ConditionTrue)) +} - // Make sure all worker Pods are running. - for _, pod := range podList.Items { - pod.Status.Phase = corev1.PodRunning - err = fakeClient.Status().Update(ctx, &pod) - assert.Nil(t, err, "Fail to update pod status") +// TestCalculateStatusWithoutDesiredReplicas tests that the cluster CR should not be marked as Ready if +// DesiredWorkerReplicas > 0 and DesiredWorkerReplicas != ReadyWorkerReplicas +func TestCalculateStatusWithoutDesiredReplicas(t *testing.T) { + setupTest(t) + + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + headServiceIP := "aaa.bbb.ccc.ddd" + headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) + assert.Nil(t, err, "Failed to build head service.") + headService.Spec.ClusterIP = headServiceIP + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "headNode", + Namespace: namespaceStr, + Labels: map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + Status: corev1.PodStatus{ + PodIP: headNodeIP, + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, } + runtimeObjects := []runtime.Object{headPod, headService} - // Initialize a new RayClusterReconciler. - testRayClusterReconciler := &RayClusterReconciler{ + // Initialize a fake client with newScheme and runtimeObjects. + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.Background() + + // Initialize a RayCluster reconciler. + r := &RayClusterReconciler{ Client: fakeClient, Recorder: &record.FakeRecorder{}, Scheme: scheme.Scheme, } - // Since the desired state of the workerGroup is 3 replicas, the controller - // will delete 2 worker Pods. - err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) - assert.Nil(t, err, "Fail to reconcile Pods") + newInstance, err := r.calculateStatus(ctx, testRayCluster, nil) + assert.Nil(t, err) + assert.NotEqual(t, newInstance.Status.DesiredWorkerReplicas, 0) + assert.NotEqual(t, newInstance.Status.DesiredWorkerReplicas, newInstance.Status.ReadyWorkerReplicas) + assert.Equal(t, newInstance.Status.State, rayv1.ClusterState("")) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + assert.Equal(t, newInstance.Status.Reason, "") + assert.Nil(t, newInstance.Status.StateTransitionTimes) +} - err = fakeClient.List(ctx, &podList, &client.ListOptions{ - LabelSelector: workerSelector, - Namespace: namespaceStr, - }) - assert.Nil(t, err, "Fail to get Pod list after reconcile") - assert.Equal(t, expectedNumWorkerPods, len(podList.Items)) +// TestCalculateStatusWithSuspendedWorkerGroups tests that the cluster CR should be marked as Ready without workers +// and all desired resources are not counted with suspended workers +func TestCalculateStatusWithSuspendedWorkerGroups(t *testing.T) { + setupTest(t) - // Update 1 worker Pod to Failed (a terminate state) state. - podList.Items[0].Status.Phase = corev1.PodFailed - err = fakeClient.Status().Update(ctx, &podList.Items[0]) - assert.Nil(t, err, "Fail to update Pod status") + testRayCluster.Spec.WorkerGroupSpecs[0].Suspend = ptr.To[bool](true) + testRayCluster.Spec.WorkerGroupSpecs[0].MinReplicas = ptr.To[int32](100) + testRayCluster.Spec.WorkerGroupSpecs[0].MaxReplicas = ptr.To[int32](100) + testRayCluster.Spec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests = corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("100Mi"), + } - // Reconcile again, and the Failed worker Pod should be deleted even if the goal state of the workerGroup specifies 3 replicas. - // The function will return an error to requeue the request after a brief delay. Moreover, if there are unhealthy worker - // Pods to be deleted, the controller won't create new worker Pods during the same reconcile loop. As a result, the number of worker - // Pods will be (expectedNumWorkerPods - 1) after the reconcile loop. - err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) - assert.NotNil(t, err) - err = fakeClient.List(ctx, &podList, &client.ListOptions{ - LabelSelector: workerSelector, - Namespace: namespaceStr, - }) - assert.Nil(t, err, "Fail to get Pod list after reconcile") - assert.Equal(t, expectedNumWorkerPods-1, len(podList.Items)) + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + headServiceIP := "aaa.bbb.ccc.ddd" + headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) + assert.Nil(t, err, "Failed to build head service.") + headService.Spec.ClusterIP = headServiceIP + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "headNode", + Namespace: namespaceStr, + Labels: map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + Status: corev1.PodStatus{ + PodIP: headNodeIP, + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + runtimeObjects := []runtime.Object{headPod, headService} + + // Initialize a fake client with newScheme and runtimeObjects. + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.Background() + + // Initialize a RayCluster reconciler. + r := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + } + + newInstance, err := r.calculateStatus(ctx, testRayCluster, nil) + assert.Nil(t, err) + assert.Equal(t, newInstance.Status.DesiredWorkerReplicas, int32(0)) + assert.Equal(t, newInstance.Status.MinWorkerReplicas, int32(0)) + assert.Equal(t, newInstance.Status.MaxWorkerReplicas, int32(0)) + assert.Equal(t, newInstance.Status.DesiredCPU, resource.Quantity{}) + assert.Equal(t, newInstance.Status.DesiredMemory, resource.Quantity{}) + assert.Equal(t, newInstance.Status.State, rayv1.Ready) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + assert.NotNil(t, newInstance.Status.StateTransitionTimes) +} + +// TestCalculateStatusWithReconcileErrorBackAndForth tests that the cluster CR should not be marked as Ready if reconcileErr != nil +// and the Ready state should not be removed after being Ready even if reconcileErr != nil +func TestCalculateStatusWithReconcileErrorBackAndForth(t *testing.T) { + setupTest(t) + + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + headServiceIP := "aaa.bbb.ccc.ddd" + headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) + assert.Nil(t, err, "Failed to build head service.") + headService.Spec.ClusterIP = headServiceIP + podReadyStatus := corev1.PodStatus{ + PodIP: headNodeIP, + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + } + headLabel := map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + } + workerLabel := map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.WorkerNode), + } + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "headNode", + Namespace: namespaceStr, + Labels: headLabel, + }, + Status: podReadyStatus, + } + runtimeObjects := []runtime.Object{headPod, headService} + for i := int32(0); i < expectReplicaNum; i++ { + runtimeObjects = append(runtimeObjects, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workerNode-" + strconv.Itoa(int(i)), + Namespace: namespaceStr, + Labels: workerLabel, + }, + Status: podReadyStatus, + }) + } + + // Initialize a fake client with newScheme and runtimeObjects. + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.Background() + + // Initialize a RayCluster reconciler. + r := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + } + + // Test head information with a reconcile error + newInstance, err := r.calculateStatus(ctx, testRayCluster, errors.New("invalid")) + assert.Nil(t, err) + assert.NotEqual(t, newInstance.Status.DesiredWorkerReplicas, 0) + // Note that even if there are DesiredWorkerReplicas ready, we don't mark CR to be Ready state due to the reconcile error. + assert.Equal(t, newInstance.Status.DesiredWorkerReplicas, newInstance.Status.ReadyWorkerReplicas) + assert.Equal(t, newInstance.Status.State, rayv1.ClusterState("")) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + assert.Equal(t, newInstance.Status.Reason, "") + assert.Nil(t, newInstance.Status.StateTransitionTimes) + + // Test head information without a reconcile error + newInstance, err = r.calculateStatus(ctx, newInstance, nil) + assert.Nil(t, err) + assert.NotEqual(t, newInstance.Status.DesiredWorkerReplicas, 0) + assert.Equal(t, newInstance.Status.DesiredWorkerReplicas, newInstance.Status.ReadyWorkerReplicas) + assert.Equal(t, newInstance.Status.State, rayv1.Ready) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + assert.Equal(t, newInstance.Status.Reason, "") + assert.NotNil(t, newInstance.Status.StateTransitionTimes) + assert.NotNil(t, newInstance.Status.StateTransitionTimes[rayv1.Ready]) + t1 := newInstance.Status.StateTransitionTimes[rayv1.Ready] + + // Test head information with a reconcile error again + newInstance, err = r.calculateStatus(ctx, newInstance, errors.New("invalid2")) + assert.Nil(t, err) + assert.NotEqual(t, newInstance.Status.DesiredWorkerReplicas, 0) + assert.Equal(t, newInstance.Status.DesiredWorkerReplicas, newInstance.Status.ReadyWorkerReplicas) + assert.Equal(t, newInstance.Status.State, rayv1.Ready) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + assert.Equal(t, newInstance.Status.Reason, "") + assert.NotNil(t, newInstance.Status.StateTransitionTimes) + assert.NotNil(t, newInstance.Status.StateTransitionTimes[rayv1.Ready]) + assert.Equal(t, t1, newInstance.Status.StateTransitionTimes[rayv1.Ready]) // no change to StateTransitionTimes +} + +func TestRayClusterProvisionedCondition(t *testing.T) { + setupTest(t) + assert.True(t, features.Enabled(features.RayClusterStatusConditions)) + + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + ReadyStatus := corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + } + + UnReadyStatus := corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + }, + } + + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "headNode", + Namespace: namespaceStr, + Labels: map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + } + + workerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workerNode", + Namespace: namespaceStr, + Labels: map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.WorkerNode), + utils.RayNodeGroupLabelKey: groupNameStr, + }, + }, + } + + runtimeObjects := append([]runtime.Object{headPod, workerPod}, testServices...) + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.Background() + r := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + } + + // Initially, neither head Pod nor worker Pod are ready. The RayClusterProvisioned condition should not be present. + headPod.Status = UnReadyStatus + workerPod.Status = UnReadyStatus + _ = fakeClient.Status().Update(ctx, headPod) + _ = fakeClient.Status().Update(ctx, workerPod) + testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) + rayClusterProvisionedCondition := meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionFalse) + assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.RayClusterPodsProvisioning) + + // After a while, all Ray Pods are ready for the first time, RayClusterProvisioned condition should be added and set to True. + headPod.Status = ReadyStatus + workerPod.Status = ReadyStatus + _ = fakeClient.Status().Update(ctx, headPod) + _ = fakeClient.Status().Update(ctx, workerPod) + testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) + rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue) + assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime) + + // After a while, worker Pod fails readiness, but since RayClusterProvisioned focuses solely on whether all Ray Pods are ready for the first time, + // RayClusterProvisioned condition should still be True. + workerPod.Status = UnReadyStatus + _ = fakeClient.Status().Update(ctx, workerPod) + testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) + rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue) + assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime) + + // After a while, head Pod also fails readiness, RayClusterProvisioned condition should still be true. + headPod.Status = UnReadyStatus + _ = fakeClient.Status().Update(ctx, headPod) + testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil) + rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)) + assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue) + assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime) +} + +func TestStateTransitionTimes_NoStateChange(t *testing.T) { + setupTest(t) + + // Create a new scheme with CRDs, Pod, Service schemes. + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + + // Mock data + headServiceIP := "aaa.bbb.ccc.ddd" + headService, err := common.BuildServiceForHeadPod(context.Background(), *testRayCluster, nil, nil) + assert.Nil(t, err, "Failed to build head service.") + headService.Spec.ClusterIP = headServiceIP + // headService.Spec.cont + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "headNode", + Namespace: namespaceStr, + Labels: map[string]string{ + utils.RayClusterLabelKey: instanceName, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + Status: corev1.PodStatus{ + PodIP: headNodeIP, + Phase: corev1.PodRunning, + }, + } + runtimeObjects := []runtime.Object{headPod, headService} + + // Initialize a fake client with newScheme and runtimeObjects. + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.Background() + + // Initialize a RayCluster reconciler. + r := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + } + + preUpdateTime := metav1.Now() + testRayCluster.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + testRayCluster.Status.StateTransitionTimes = map[rayv1.ClusterState]*metav1.Time{rayv1.Ready: &preUpdateTime} + newInstance, err := r.calculateStatus(ctx, testRayCluster, nil) + assert.Nil(t, err) + assert.Equal(t, preUpdateTime, *newInstance.Status.StateTransitionTimes[rayv1.Ready], "Cluster state transition timestamp should not be updated") +} + +func Test_TerminatedWorkers_NoAutoscaler(t *testing.T) { + setupTest(t) + + // TODO (kevin85421): The tests in this file are not independent. As a workaround, + // I added the assertion to prevent the test logic from being affected by other changes. + // However, we should refactor the tests in the future. + + // This test makes some assumptions about the testRayCluster object. + // (1) 1 workerGroup + // (2) The goal state of the workerGroup is 3 replicas. + // (3) Set the `WorkersToDelete` field to an empty slice. + // (4) Disable autoscaling. + assert.Equal(t, 1, len(testRayCluster.Spec.WorkerGroupSpecs), "This test assumes only one worker group.") + expectedNumWorkerPods := int(*testRayCluster.Spec.WorkerGroupSpecs[0].Replicas) + assert.Equal(t, 3, expectedNumWorkerPods, "This test assumes the expected number of worker pods is 3.") + testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} + testRayCluster.Spec.EnableInTreeAutoscaling = nil + + // This test makes some assumptions about the testPods object. + // `testPods` contains 6 pods, including 1 head pod and 5 worker pods. + assert.Equal(t, 6, len(testPods), "This test assumes the testPods object contains 6 pods.") + numHeadPods := 1 + oldNumWorkerPods := len(testPods) - numHeadPods + + // Initialize a fake client with newScheme and runtimeObjects. + fakeClient := clientFake.NewClientBuilder().WithRuntimeObjects(testPods...).Build() + ctx := context.Background() + + // Get the pod list from the fake client. + podList := corev1.PodList{} + err := fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + assert.Nil(t, err, "Fail to get pod list") + assert.Equal(t, oldNumWorkerPods+numHeadPods, len(podList.Items), "Init pod list len is wrong") + + // Make sure all worker Pods are running. + for _, pod := range podList.Items { + pod.Status.Phase = corev1.PodRunning + err = fakeClient.Status().Update(ctx, &pod) + assert.Nil(t, err, "Fail to update pod status") + } + + // Initialize a new RayClusterReconciler. + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), + } + + // Since the desired state of the workerGroup is 3 replicas, the controller + // will delete 2 worker Pods. + err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) + assert.Nil(t, err, "Fail to reconcile Pods") + + err = fakeClient.List(ctx, &podList, &client.ListOptions{ + LabelSelector: workerSelector, + Namespace: namespaceStr, + }) + assert.Nil(t, err, "Fail to get Pod list after reconcile") + assert.Equal(t, expectedNumWorkerPods, len(podList.Items)) + + // Update 1 worker Pod to Failed (a terminate state) state. + podList.Items[0].Status.Phase = corev1.PodFailed + err = fakeClient.Status().Update(ctx, &podList.Items[0]) + assert.Nil(t, err, "Fail to update Pod status") + + // Reconcile again, and the Failed worker Pod should be deleted even if the goal state of the workerGroup specifies 3 replicas. + // The function will return an error to requeue the request after a brief delay. Moreover, if there are unhealthy worker + // Pods to be deleted, the controller won't create new worker Pods during the same reconcile loop. As a result, the number of worker + // Pods will be (expectedNumWorkerPods - 1) after the reconcile loop. + err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) + assert.NotNil(t, err) + err = fakeClient.List(ctx, &podList, &client.ListOptions{ + LabelSelector: workerSelector, + Namespace: namespaceStr, + }) + assert.Nil(t, err, "Fail to get Pod list after reconcile") + assert.Equal(t, expectedNumWorkerPods-1, len(podList.Items)) // Reconcile again, and the controller will create a new worker Pod to reach the goal state of the workerGroup. // Note that the status of new worker Pod created by the fake client is empty, so we need to set all worker @@ -1813,23 +2345,38 @@ func Test_TerminatedHead_RestartPolicy(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: newScheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } - // The head Pod will not be deleted because the restart policy is `Always`. + // The head Pod will be deleted regardless restart policy. + err = testRayClusterReconciler.reconcilePods(ctx, cluster) + assert.NotNil(t, err) + err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + assert.Nil(t, err, "Fail to get pod list") + assert.Equal(t, 0, len(podList.Items)) + + // The new head Pod will be created in this reconcile loop. err = testRayClusterReconciler.reconcilePods(ctx, cluster) assert.Nil(t, err) err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) assert.Nil(t, err, "Fail to get pod list") assert.Equal(t, 1, len(podList.Items)) - // Make sure the head Pod's restart policy is `Never` and status is `Failed`. + // Make sure the head Pod's restart policy is `Never` and status is `Running`. podList.Items[0].Spec.RestartPolicy = corev1.RestartPolicyNever err = fakeClient.Update(ctx, &podList.Items[0]) assert.Nil(t, err) - podList.Items[0].Status.Phase = corev1.PodFailed + podList.Items[0].Status.Phase = corev1.PodRunning + podList.Items[0].Status.ContainerStatuses = append(podList.Items[0].Status.ContainerStatuses, + corev1.ContainerStatus{ + Name: podList.Items[0].Spec.Containers[utils.RayContainerIndex].Name, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{}, + }, + }) err = fakeClient.Status().Update(ctx, &podList.Items[0]) assert.Nil(t, err) @@ -1896,9 +2443,10 @@ func Test_RunningPods_RayContainerTerminated(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: newScheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // The head Pod will be deleted and the controller will return an error @@ -1936,12 +2484,11 @@ func Test_ShouldDeletePod(t *testing.T) { }{ { // The restart policy is `Always` and the Pod is in a terminate state. - // The expected behavior is that the controller will not delete the Pod because - // the restart policy is `Always`. - name: "restartPolicy=Always, phase=PodFailed, shouldDelete=false", + // The expected behavior is that the controller will delete the Pod regardless of the restart policy. + name: "restartPolicy=Always, phase=PodFailed, shouldDelete=true", restartPolicy: corev1.RestartPolicyAlways, phase: corev1.PodFailed, - shouldDelete: false, + shouldDelete: true, }, { // The restart policy is `Always`, the Pod is not in a terminate state, @@ -2099,9 +2646,10 @@ func Test_RedisCleanupFeatureFlag(t *testing.T) { // Initialize the reconciler testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: newScheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } rayClusterList := rayv1.RayClusterList{} @@ -2110,13 +2658,9 @@ func Test_RedisCleanupFeatureFlag(t *testing.T) { assert.Equal(t, 1, len(rayClusterList.Items)) assert.Equal(t, 0, len(rayClusterList.Items[0].Finalizers)) - request := ctrl.Request{NamespacedName: types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}} - _, err = testRayClusterReconciler.rayClusterReconcile(ctx, request, cluster) + _, err = testRayClusterReconciler.rayClusterReconcile(ctx, cluster) if tc.enableGCSFTRedisCleanup == "false" { - // No finalizer should be added to the RayCluster. The head service and Ray Pods should be created. - // The head service's ClusterIP is empty, so the function `getHeadServiceIP` will return an error - // to requeue the request when it tries to update the RayCluster's status. - assert.NotNil(t, err) + assert.Nil(t, err) podList := corev1.PodList{} err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) assert.Nil(t, err) @@ -2135,24 +2679,106 @@ func Test_RedisCleanupFeatureFlag(t *testing.T) { if tc.expectedNumFinalizers > 0 { assert.True(t, controllerutil.ContainsFinalizer(&rayClusterList.Items[0], utils.GCSFaultToleranceRedisCleanupFinalizer)) - // No Pod should be created before adding the GCS FT Redis cleanup finalizer. - podList := corev1.PodList{} - err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) - assert.Nil(t, err, "Fail to get Pod list") - assert.Equal(t, 0, len(podList.Items)) + // No Pod should be created before adding the GCS FT Redis cleanup finalizer. + podList := corev1.PodList{} + err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + assert.Nil(t, err, "Fail to get Pod list") + assert.Equal(t, 0, len(podList.Items)) + + // Reconcile the RayCluster again. The controller should create Pods. + _, err = testRayClusterReconciler.rayClusterReconcile(ctx, cluster) + assert.Nil(t, err) + + err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + assert.Nil(t, err, "Fail to get Pod list") + assert.NotEqual(t, 0, len(podList.Items)) + } + }) + } +} + +func TestEvents_RedisCleanup(t *testing.T) { + setupTest(t) + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + _ = batchv1.AddToScheme(newScheme) + + // Prepare a RayCluster with the GCS FT enabled and Autoscaling disabled. + gcsFTEnabledCluster := testRayCluster.DeepCopy() + if gcsFTEnabledCluster.Annotations == nil { + gcsFTEnabledCluster.Annotations = make(map[string]string) + } + gcsFTEnabledCluster.Annotations[utils.RayFTEnabledAnnotationKey] = "true" + gcsFTEnabledCluster.Spec.EnableInTreeAutoscaling = nil + + // Add the Redis cleanup finalizer to the RayCluster and modify the RayCluster's DeleteTimestamp to trigger the Redis cleanup. + controllerutil.AddFinalizer(gcsFTEnabledCluster, utils.GCSFaultToleranceRedisCleanupFinalizer) + now := metav1.Now() + gcsFTEnabledCluster.DeletionTimestamp = &now + errInjected := errors.New("random error") + + tests := map[string]struct { + fakeClientFn func(client.Object) client.Client + errInjected error + }{ + "Created Redis cleanup Job": { + fakeClientFn: func(obj client.Object) client.Client { + return clientFake.NewClientBuilder(). + WithScheme(newScheme). + WithRuntimeObjects([]runtime.Object{obj}...). + Build() + }, + errInjected: nil, + }, + "Failed to create Redis cleanup Job": { + fakeClientFn: func(obj client.Object) client.Client { + return clientFake.NewClientBuilder(). + WithScheme(newScheme). + WithRuntimeObjects([]runtime.Object{obj}...). + WithInterceptorFuncs(interceptor.Funcs{ + Create: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.CreateOption) error { + return errInjected + }, + }). + Build() + }, + errInjected: errInjected, + }, + } + + for message, tc := range tests { + t.Run(message, func(t *testing.T) { + cluster := gcsFTEnabledCluster.DeepCopy() + ctx := context.Background() - // Reconcile the RayCluster again. The controller should create Pods. - _, err = testRayClusterReconciler.rayClusterReconcile(ctx, request, cluster) + fakeClient := tc.fakeClientFn(cluster) - // The head service and Ray Pods should be created. The head service's ClusterIP is empty, - // so the function `getHeadServiceIP` will return an error to requeue the request when it - // tries to update the RayCluster's status. - assert.NotNil(t, err) + // Buffer length of 100 is arbitrary here. We should have only 1 event generated, but we keep 100 + // if that isn't the case in the future. If this test starts timing out because of a full + // channel, this is probably the reason, and we should change our approach or increase buffer length. + recorder := record.NewFakeRecorder(100) - err = fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) - assert.Nil(t, err, "Fail to get Pod list") - assert.NotEqual(t, 0, len(podList.Items)) + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: newScheme, + } + + _, err := testRayClusterReconciler.rayClusterReconcile(ctx, cluster) + assert.ErrorIs(t, err, tc.errInjected) + + var foundEvent bool + var events []string + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, message) { + foundEvent = true + break + } + events = append(events, event) } + assert.Truef(t, foundEvent, "Expected event to be generated for redis cleanup job creation, got events: %s", strings.Join(events, "\n")) }) } } @@ -2282,8 +2908,7 @@ func Test_RedisCleanup(t *testing.T) { assert.Nil(t, err, "Fail to get Job list") assert.Equal(t, 0, len(jobList.Items)) - request := ctrl.Request{NamespacedName: types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}} - _, err = testRayClusterReconciler.rayClusterReconcile(ctx, request, cluster) + _, err = testRayClusterReconciler.rayClusterReconcile(ctx, cluster) assert.Nil(t, err) // Check Job @@ -2310,7 +2935,7 @@ func Test_RedisCleanup(t *testing.T) { // Reconcile the RayCluster again. The controller should remove the finalizer and the RayCluster will be deleted. // See https://github.com/kubernetes-sigs/controller-runtime/blob/release-0.11/pkg/client/fake/client.go#L308-L310 for more details. - _, err = testRayClusterReconciler.rayClusterReconcile(ctx, request, cluster) + _, err = testRayClusterReconciler.rayClusterReconcile(ctx, cluster) assert.Nil(t, err, "Fail to reconcile RayCluster") err = fakeClient.List(ctx, &rayClusterList, client.InNamespace(namespaceStr)) assert.Nil(t, err, "Fail to get RayCluster list") @@ -2328,7 +2953,7 @@ func TestReconcile_Replicas_Optional(t *testing.T) { assert.Equal(t, 1, len(testRayCluster.Spec.WorkerGroupSpecs), "This test assumes only one worker group.") // Disable autoscaling so that the random Pod deletion is enabled. - testRayCluster.Spec.EnableInTreeAutoscaling = pointer.Bool(false) + testRayCluster.Spec.EnableInTreeAutoscaling = ptr.To(false) testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} tests := map[string]struct { @@ -2341,22 +2966,22 @@ func TestReconcile_Replicas_Optional(t *testing.T) { // If `Replicas` is nil, the controller will set the desired state of the workerGroup to `MinReplicas` Pods. // [Note]: It is not possible for `Replicas` to be nil in practice because it has a default value in the CRD. replicas: nil, - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(10000), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](10000), desiredReplicas: 1, }, "Replicas is smaller than MinReplicas": { // If `Replicas` is smaller than `MinReplicas`, the controller will set the desired state of the workerGroup to `MinReplicas` Pods. - replicas: pointer.Int32(0), - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(10000), + replicas: ptr.To[int32](0), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](10000), desiredReplicas: 1, }, "Replicas is larger than MaxReplicas": { // If `Replicas` is larger than `MaxReplicas`, the controller will set the desired state of the workerGroup to `MaxReplicas` Pods. - replicas: pointer.Int32(4), - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(3), + replicas: ptr.To[int32](4), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](3), desiredReplicas: 3, }, } @@ -2386,9 +3011,10 @@ func TestReconcile_Replicas_Optional(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Since the desired state of the workerGroup is 1 replica, @@ -2416,7 +3042,7 @@ func TestReconcile_Multihost_Replicas(t *testing.T) { // Disable autoscaling so that the random Pod deletion is enabled. // Set `NumOfHosts` to 4 to specify multi-host group - testRayCluster.Spec.EnableInTreeAutoscaling = pointer.Bool(false) + testRayCluster.Spec.EnableInTreeAutoscaling = ptr.To(false) testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} testRayCluster.Spec.WorkerGroupSpecs[0].NumOfHosts = 4 @@ -2430,24 +3056,24 @@ func TestReconcile_Multihost_Replicas(t *testing.T) { "Replicas is nil": { // If `Replicas` is nil, the controller will set the desired state of the workerGroup to `MinReplicas`*`NumOfHosts` Pods. replicas: nil, - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(10000), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](10000), desiredReplicas: 1, numOfHosts: 4, }, "Replicas is smaller than MinReplicas": { // If `Replicas` is smaller than `MinReplicas`, the controller will set the desired state of the workerGroup to `MinReplicas`*`NumOfHosts` Pods. - replicas: pointer.Int32(0), - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(10000), + replicas: ptr.To[int32](0), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](10000), desiredReplicas: 1, numOfHosts: 4, }, "Replicas is larger than MaxReplicas": { // If `Replicas` is larger than `MaxReplicas`, the controller will set the desired state of the workerGroup to `MaxReplicas`*`NumOfHosts` Pods. - replicas: pointer.Int32(4), - minReplicas: pointer.Int32(1), - maxReplicas: pointer.Int32(3), + replicas: ptr.To[int32](4), + minReplicas: ptr.To[int32](1), + maxReplicas: ptr.To[int32](3), desiredReplicas: 3, numOfHosts: 4, }, @@ -2478,9 +3104,10 @@ func TestReconcile_Multihost_Replicas(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } // Since the desired state of the workerGroup is 1 replica, @@ -2508,9 +3135,9 @@ func TestReconcile_NumOfHosts(t *testing.T) { // Disable autoscaling so that the random Pod deletion is enabled. // Set `Replicas` to 1 and clear `WorkersToDelete` - testRayCluster.Spec.EnableInTreeAutoscaling = pointer.Bool(false) + testRayCluster.Spec.EnableInTreeAutoscaling = ptr.To(false) testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} - testRayCluster.Spec.WorkerGroupSpecs[0].Replicas = pointer.Int32(1) + testRayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](1) tests := map[string]struct { replicas *int32 @@ -2518,12 +3145,12 @@ func TestReconcile_NumOfHosts(t *testing.T) { }{ "NumOfHosts is 1": { // If `NumOfHosts` is 1, the controller will set the desired state of the workerGroup to `Replicas` Pods. - replicas: pointer.Int32(1), + replicas: ptr.To[int32](1), numOfHosts: 1, }, "NumOfHosts is larger than 1": { // If `NumOfHosts` is larger than 1, the controller will set the desired state of the workerGroup to `NumOfHosts` Pods. - replicas: pointer.Int32(1), + replicas: ptr.To[int32](1), numOfHosts: 4, }, } @@ -2546,9 +3173,10 @@ func TestReconcile_NumOfHosts(t *testing.T) { // Initialize a new RayClusterReconciler. testRayClusterReconciler := &RayClusterReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), } err = testRayClusterReconciler.reconcilePods(ctx, cluster) @@ -2670,3 +3298,537 @@ func TestDeleteAllPods(t *testing.T) { assert.Equal(t, 2, len(pods.Items)) assert.Subset(t, []string{"deleted", "other"}, []string{pods.Items[0].Name, pods.Items[1].Name}) } + +func TestEvents_FailedPodCreation(t *testing.T) { + tests := []struct { + errInject error + // simulate is responsible for simulating pod deletions in different scenarios. + simulate func(ctx context.Context, t *testing.T, podList corev1.PodList, client client.WithWatch) + name string + failureMsg string + podType string + }{ + { + errInject: utils.ErrFailedCreateWorkerPod, + simulate: func(ctx context.Context, t *testing.T, podList corev1.PodList, client client.WithWatch) { + // Simulate the deletion of 3 worker Pods. After the deletion, the number of worker Pods should be 3. + err := client.Delete(ctx, &podList.Items[2]) + assert.Nil(t, err, "Fail to delete pod") + err = client.Delete(ctx, &podList.Items[3]) + assert.Nil(t, err, "Fail to delete pod") + err = client.Delete(ctx, &podList.Items[4]) + assert.Nil(t, err, "Fail to delete pod") + }, + name: "failure event for failed worker pod creation", + failureMsg: "Failed to create worker Pod", + podType: "worker", + }, + { + errInject: utils.ErrFailedCreateHeadPod, + simulate: func(ctx context.Context, t *testing.T, podList corev1.PodList, client client.WithWatch) { + // Simulate the deletion of head pod + err := client.Delete(ctx, &podList.Items[0]) + assert.Nil(t, err, "Fail to delete pod") + }, + name: "failure event for failed head pod creation", + failureMsg: "Failed to create head Pod", + podType: "head", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + setupTest(t) + + // TODO (kevin85421): The tests in this file are not independent. As a workaround, + // I added the assertion to prevent the test logic from being affected by other changes. + // However, we should refactor the tests in the future. + + // This test makes some assumptions about the testRayCluster object. + // (1) 1 workerGroup (2) The goal state of the workerGroup is 3 replicas. (3) Set the workersToDelete to empty. + assert.Equal(t, 1, len(testRayCluster.Spec.WorkerGroupSpecs), "This test assumes only one worker group.") + testRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} + expectedNumWorkerPods := int(*testRayCluster.Spec.WorkerGroupSpecs[0].Replicas) + assert.Equal(t, 3, expectedNumWorkerPods, "This test assumes the expected number of worker pods is 3.") + + // This test makes some assumptions about the testPods object. + // `testPods` contains 6 pods, including 1 head pod and 5 worker pods. + assert.Equal(t, 6, len(testPods), "This test assumes the testPods object contains 6 pods.") + numHeadPods := 1 + oldNumWorkerPods := len(testPods) - numHeadPods + + // Initialize a fake client with newScheme and runtimeObjects. + // We create a fake client with an interceptor for Create() in order to simulate a failure for pod creation. + // We return utils.ErrFailedCreateWorkerPod here because we deleted a worker pod in the previous step, so + // an attempt to reconcile that will take place. + fakeClient := clientFake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ + Create: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.CreateOption) error { + return test.errInject + }, + }).WithRuntimeObjects(testPods...).Build() + ctx := context.Background() + + // Get the pod list from the fake client. + podList := corev1.PodList{} + err := fakeClient.List(ctx, &podList, client.InNamespace(namespaceStr)) + assert.Nil(t, err, "Fail to get pod list") + assert.Equal(t, oldNumWorkerPods+numHeadPods, len(podList.Items), "Init pod list len is wrong") + + test.simulate(ctx, t, podList, fakeClient) + + // Buffer length of 100 is arbitrary here. We should have only 1 event genereated, but we keep 100 + // if that isn't the case in the future. If this test starts timining out because of a full + // channel, this is probably the reason and we should change our approach or increase buffer length. + recorder := record.NewFakeRecorder(100) + + // Initialize a new RayClusterReconciler. + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: scheme.Scheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), + } + + // Since the desired state of the workerGroup is 3 replicas, + // the controller will try to create one worker pod. + err = testRayClusterReconciler.reconcilePods(ctx, testRayCluster) + // We should get an error here because of simulating a pod creation failure. + assert.NotNil(t, err, "unexpected error") + + var foundFailureEvent bool + events := []string{} + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, test.failureMsg) { + foundFailureEvent = true + break + } + events = append(events, event) + } + + assert.Truef(t, foundFailureEvent, "Expected event to be generated for %s pod creation failure, got events: %s", test.podType, strings.Join(events, "\n")) + }) + } +} + +func Test_ReconcileManagedBy(t *testing.T) { + setupTest(t) + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + _ = corev1.AddToScheme(newScheme) + _ = batchv1.AddToScheme(newScheme) + + tests := map[string]struct { + managedBy *string + shouldReconcile bool + }{ + "ManagedBy field not set": { + managedBy: nil, + shouldReconcile: true, + }, + "ManagedBy field to RayOperator": { + managedBy: ptr.To(utils.KubeRayController), + shouldReconcile: true, + }, + "ManagedBy field empty": { + managedBy: ptr.To(""), + }, + "ManagedBy field to external allowed controller": { + managedBy: ptr.To(MultiKueueController), + }, + "ManagedBy field to external not allowed controller": { + managedBy: ptr.To("controller.com/invalid"), + }, + } + + for name, tc := range tests { + name, tc := name, tc + t.Run(name, func(t *testing.T) { + ctx := context.Background() + cluster := testRayCluster.DeepCopy() + cluster.Spec.EnableInTreeAutoscaling = ptr.To(false) + cluster.Status = rayv1.RayClusterStatus{} + cluster.Spec.ManagedBy = tc.managedBy + runtimeObjects := []runtime.Object{cluster} + fakeClient := clientFake.NewClientBuilder(). + WithScheme(newScheme). + WithRuntimeObjects(runtimeObjects...). + WithStatusSubresource(cluster). + Build() + testRayClusterReconciler := &RayClusterReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + rayClusterScaleExpectation: expectations.NewRayClusterScaleExpectation(fakeClient), + } + + result, err := testRayClusterReconciler.rayClusterReconcile(ctx, cluster) + assert.Nil(t, err) + if tc.shouldReconcile { + // finish with requeue due to detected incosistency + assert.Equal(t, result.RequeueAfter.Seconds(), DefaultRequeueDuration.Seconds()) + } else { + // skip reconciliation + assert.Equal(t, result.RequeueAfter.Seconds(), time.Duration(0).Seconds()) + } + }) + } +} + +func TestValidateRayClusterSpecGcsFaultToleranceOptions(t *testing.T) { + errorMessageBothSet := fmt.Sprintf("%s annotation and GcsFaultToleranceOptions are both set. "+ + "Please use only GcsFaultToleranceOptions to configure GCS fault tolerance", utils.RayFTEnabledAnnotationKey) + errorMessageRedisAddressSet := fmt.Sprintf("%s is set which implicitly enables GCS fault tolerance, "+ + "but GcsFaultToleranceOptions is not set. Please set GcsFaultToleranceOptions "+ + "to enable GCS fault tolerance", utils.RAY_REDIS_ADDRESS) + errorMessageRedisAddressConflict := fmt.Sprintf("cannot set `%s` env var in head Pod when "+ + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisAddress instead", utils.RAY_REDIS_ADDRESS) + errorMessageExternalStorageNamespaceConflict := fmt.Sprintf("cannot set `%s` annotation when "+ + "GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.ExternalStorageNamespace instead", utils.RayExternalStorageNSAnnotationKey) + + tests := []struct { + gcsFaultToleranceOptions *rayv1.GcsFaultToleranceOptions + annotations map[string]string + name string + errorMessage string + envVars []corev1.EnvVar + expectError bool + }{ + // GcsFaultToleranceOptions and ray.io/ft-enabled should not be both set. + { + name: "ray.io/ft-enabled is set to false and GcsFaultToleranceOptions is set", + annotations: map[string]string{ + utils.RayFTEnabledAnnotationKey: "false", + }, + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + expectError: true, + errorMessage: errorMessageBothSet, + }, + { + name: "ray.io/ft-enabled is set to true and GcsFaultToleranceOptions is set", + annotations: map[string]string{ + utils.RayFTEnabledAnnotationKey: "true", + }, + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + expectError: true, + errorMessage: errorMessageBothSet, + }, + { + name: "ray.io/ft-enabled is not set and GcsFaultToleranceOptions is set", + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + expectError: false, + }, + { + name: "ray.io/ft-enabled is not set and GcsFaultToleranceOptions is not set", + gcsFaultToleranceOptions: nil, + expectError: false, + }, + // RAY_REDIS_ADDRESS should not be set if KubeRay is not aware that GCS fault tolerance is enabled. + { + name: "ray.io/ft-enabled is set to false and RAY_REDIS_ADDRESS is set", + annotations: map[string]string{ + utils.RayFTEnabledAnnotationKey: "false", + }, + envVars: []corev1.EnvVar{ + { + Name: utils.RAY_REDIS_ADDRESS, + Value: "redis:6379", + }, + }, + expectError: true, + errorMessage: errorMessageRedisAddressSet, + }, + { + name: "gcsFaultToleranceOptions is set and RAY_REDIS_ADDRESS is set", + envVars: []corev1.EnvVar{ + { + Name: utils.RAY_REDIS_ADDRESS, + Value: "redis:6379", + }, + }, + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + expectError: true, + errorMessage: errorMessageRedisAddressConflict, + }, + { + name: "FT is disabled and RAY_REDIS_ADDRESS is set", + envVars: []corev1.EnvVar{ + { + Name: utils.RAY_REDIS_ADDRESS, + Value: "redis:6379", + }, + }, + expectError: true, + errorMessage: errorMessageRedisAddressSet, + }, + { + name: "ray.io/ft-enabled is set to true and RAY_REDIS_ADDRESS is set", + annotations: map[string]string{ + utils.RayFTEnabledAnnotationKey: "true", + }, + envVars: []corev1.EnvVar{ + { + Name: utils.RAY_REDIS_ADDRESS, + Value: "redis:6379", + }, + }, + expectError: false, + }, + { + name: "gcsFaultToleranceOptions is set and ray.io/external-storage-namespace is set", + annotations: map[string]string{ + utils.RayExternalStorageNSAnnotationKey: "myns", + }, + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + expectError: true, + errorMessage: errorMessageExternalStorageNamespaceConflict, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: tt.annotations, + }, + Spec: rayv1.RayClusterSpec{ + GcsFaultToleranceOptions: tt.gcsFaultToleranceOptions, + HeadGroupSpec: rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Env: tt.envVars, + }, + }, + }, + }, + }, + }, + } + err := validateRayClusterSpec(rayCluster) + if tt.expectError { + assert.Error(t, err) + assert.EqualError(t, err, tt.errorMessage) + } else { + assert.Nil(t, err) + } + }) + } +} + +func TestValidateRayClusterSpecRedisPassword(t *testing.T) { + tests := []struct { + gcsFaultToleranceOptions *rayv1.GcsFaultToleranceOptions + name string + rayStartParams map[string]string + envVars []corev1.EnvVar + expectError bool + }{ + { + name: "GcsFaultToleranceOptions is set and `redis-password` is also set in rayStartParams", + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + rayStartParams: map[string]string{ + "redis-password": "password", + }, + expectError: true, + }, + { + name: "GcsFaultToleranceOptions is set and `REDIS_PASSWORD` env var is also set in the head Pod", + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + envVars: []corev1.EnvVar{ + { + Name: utils.REDIS_PASSWORD, + Value: "password", + }, + }, + expectError: true, + }, + { + name: "GcsFaultToleranceOptions.RedisPassword is set", + gcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{ + RedisPassword: &rayv1.RedisCredential{ + Value: "password", + }, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rayCluster := &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + GcsFaultToleranceOptions: tt.gcsFaultToleranceOptions, + HeadGroupSpec: rayv1.HeadGroupSpec{ + RayStartParams: tt.rayStartParams, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Env: tt.envVars, + }, + }, + }, + }, + }, + }, + } + err := validateRayClusterSpec(rayCluster) + if tt.expectError { + assert.Error(t, err) + } else { + assert.Nil(t, err) + } + }) + } +} + +func TestValidateRayClusterSpecEmptyContainers(t *testing.T) { + headGroupSpecWithOneContainer := rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "ray-head"}}, + }, + }, + } + workerGroupSpecWithOneContainer := rayv1.WorkerGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "ray-worker"}}, + }, + }, + } + headGroupSpecWithNoContainers := *headGroupSpecWithOneContainer.DeepCopy() + headGroupSpecWithNoContainers.Template.Spec.Containers = []corev1.Container{} + workerGroupSpecWithNoContainers := *workerGroupSpecWithOneContainer.DeepCopy() + workerGroupSpecWithNoContainers.Template.Spec.Containers = []corev1.Container{} + + tests := []struct { + rayCluster *rayv1.RayCluster + name string + errorMessage string + expectError bool + }{ + { + name: "headGroupSpec has no containers", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpecWithNoContainers, + }, + }, + expectError: true, + errorMessage: "headGroupSpec should have at least one container", + }, + { + name: "workerGroupSpec has no containers", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpecWithOneContainer, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{workerGroupSpecWithNoContainers}, + }, + }, + expectError: true, + errorMessage: "workerGroupSpec should have at least one container", + }, + { + name: "valid cluster with containers in both head and worker groups", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpecWithOneContainer, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{workerGroupSpecWithOneContainer}, + }, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateRayClusterSpec(tt.rayCluster) + if tt.expectError { + assert.Error(t, err) + assert.EqualError(t, err, tt.errorMessage) + } else { + assert.Nil(t, err) + } + }) + } +} + +func TestValidateRayClusterSpecSuspendingWorkerGroup(t *testing.T) { + headGroupSpec := rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "ray-head"}}, + }, + }, + } + workerGroupSpecSuspended := rayv1.WorkerGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "ray-worker"}}, + }, + }, + } + workerGroupSpecSuspended.Suspend = ptr.To[bool](true) + + tests := []struct { + rayCluster *rayv1.RayCluster + name string + errorMessage string + expectError bool + featureGate bool + }{ + { + name: "suspend without autoscaler and the feature gate", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpec, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{workerGroupSpecSuspended}, + }, + }, + featureGate: false, + expectError: true, + errorMessage: "suspending worker groups is currently available when the RayJobDeletionPolicy feature gate is enabled", + }, + { + name: "suspend without autoscaler", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpec, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{workerGroupSpecSuspended}, + }, + }, + featureGate: true, + expectError: false, + }, + { + // TODO (rueian): This can be supported in future Ray. We should check the RayVersion once we know the version. + name: "suspend with autoscaler", + rayCluster: &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: headGroupSpec, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{workerGroupSpecSuspended}, + EnableInTreeAutoscaling: ptr.To[bool](true), + }, + }, + featureGate: true, + expectError: true, + errorMessage: "suspending worker groups is not currently supported with Autoscaler enabled", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defer features.SetFeatureGateDuringTest(t, features.RayJobDeletionPolicy, tt.featureGate)() + err := validateRayClusterSpec(tt.rayCluster) + if tt.expectError { + assert.Error(t, err) + assert.EqualError(t, err, tt.errorMessage) + } else { + assert.Nil(t, err) + } + }) + } +} diff --git a/ray-operator/controllers/ray/rayjob_controller.go b/ray-operator/controllers/ray/rayjob_controller.go index ff4095efd42..7a2ac41f715 100644 --- a/ray-operator/controllers/ray/rayjob_controller.go +++ b/ray-operator/controllers/ray/rayjob_controller.go @@ -3,6 +3,8 @@ package ray import ( "context" "fmt" + "os" + "strings" "time" "github.com/go-logr/logr" @@ -11,12 +13,13 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/features" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -43,7 +46,8 @@ type RayJobReconciler struct { } // NewRayJobReconciler returns a new reconcile.Reconciler -func NewRayJobReconciler(ctx context.Context, mgr manager.Manager, dashboardClientFunc func() utils.RayDashboardClientInterface) *RayJobReconciler { +func NewRayJobReconciler(_ context.Context, mgr manager.Manager, provider utils.ClientProvider) *RayJobReconciler { + dashboardClientFunc := provider.GetDashboardClient(mgr) return &RayJobReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), @@ -60,6 +64,7 @@ func NewRayJobReconciler(ctx context.Context, mgr manager.Manager, dashboardClie // +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=core,resources=services/proxy,verbs=get;update;patch;create // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update // +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=get;list;watch;create;delete;update @@ -80,7 +85,7 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) if err := r.Get(ctx, request.NamespacedName, rayJobInstance); err != nil { if errors.IsNotFound(err) { // Request object not found, could have been deleted after reconcile request. Stop reconciliation. - logger.Info("RayJob resource not found. Ignoring since object must be deleted", "name", request.NamespacedName) + logger.Info("RayJob resource not found. Ignoring since object must be deleted") return ctrl.Result{}, nil } // Error reading the object - requeue the request. @@ -88,23 +93,34 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } + if manager := utils.ManagedByExternalController(rayJobInstance.Spec.ManagedBy); manager != nil { + logger.Info("Skipping RayJob managed by a custom controller", "managed-by", manager) + return ctrl.Result{}, nil + } + if !rayJobInstance.ObjectMeta.DeletionTimestamp.IsZero() { logger.Info("RayJob is being deleted", "DeletionTimestamp", rayJobInstance.ObjectMeta.DeletionTimestamp) // If the JobStatus is not terminal, it is possible that the Ray job is still running. This includes // the case where JobStatus is JobStatusNew. if !rayv1.IsJobTerminal(rayJobInstance.Status.JobStatus) { + rayClusterNamespacedName := common.RayJobRayClusterNamespacedName(rayJobInstance) + rayClusterInstance := &rayv1.RayCluster{} + if err := r.Get(ctx, rayClusterNamespacedName, rayClusterInstance); err != nil { + logger.Error(err, "Failed to get RayCluster") + } + rayDashboardClient := r.dashboardClientFunc() - rayDashboardClient.InitClient(rayJobInstance.Status.DashboardURL) - err := rayDashboardClient.StopJob(ctx, rayJobInstance.Status.JobId) - if err != nil { - logger.Info("Failed to stop job for RayJob", "error", err) + if err := rayDashboardClient.InitClient(ctx, rayJobInstance.Status.DashboardURL, rayClusterInstance); err != nil { + logger.Error(err, "Failed to initialize dashboard client") + } + if err := rayDashboardClient.StopJob(ctx, rayJobInstance.Status.JobId); err != nil { + logger.Error(err, "Failed to stop job for RayJob") } } logger.Info("Remove the finalizer no matter StopJob() succeeds or not.", "finalizer", utils.RayJobStopJobFinalizer) controllerutil.RemoveFinalizer(rayJobInstance, utils.RayJobStopJobFinalizer) - err := r.Update(ctx, rayJobInstance) - if err != nil { + if err := r.Update(ctx, rayJobInstance); err != nil { logger.Error(err, "Failed to remove finalizer for RayJob") return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } @@ -113,13 +129,22 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) if err := validateRayJobSpec(rayJobInstance); err != nil { logger.Error(err, "The RayJob spec is invalid") + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.InvalidRayJobSpec), + "The RayJob spec is invalid %s/%s: %v", rayJobInstance.Namespace, rayJobInstance.Name, err) + return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err + } + + if err := validateRayJobStatus(rayJobInstance); err != nil { + logger.Error(err, "The RayJob status is invalid") + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.InvalidRayJobStatus), + "The RayJob status is invalid %s/%s: %v", rayJobInstance.Namespace, rayJobInstance.Name, err) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } // Please do NOT modify `originalRayJobInstance` in the following code. originalRayJobInstance := rayJobInstance.DeepCopy() - logger.Info("RayJob", "name", rayJobInstance.Name, "namespace", rayJobInstance.Namespace, "JobStatus", rayJobInstance.Status.JobStatus, "JobDeploymentStatus", rayJobInstance.Status.JobDeploymentStatus, "SubmissionMode", rayJobInstance.Spec.SubmissionMode) + logger.Info("RayJob", "JobStatus", rayJobInstance.Status.JobStatus, "JobDeploymentStatus", rayJobInstance.Status.JobDeploymentStatus, "SubmissionMode", rayJobInstance.Spec.SubmissionMode) switch rayJobInstance.Status.JobDeploymentStatus { case rayv1.JobDeploymentStatusNew: if !controllerutil.ContainsFinalizer(rayJobInstance, utils.RayJobStopJobFinalizer) { @@ -132,16 +157,16 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) } // Set `Status.JobDeploymentStatus` to `JobDeploymentStatusInitializing`, and initialize `Status.JobId` // and `Status.RayClusterName` prior to avoid duplicate job submissions and cluster creations. - logger.Info("JobDeploymentStatusNew", "RayJob", rayJobInstance.Name) - if err = r.initRayJobStatusIfNeed(ctx, rayJobInstance); err != nil { + logger.Info("JobDeploymentStatusNew") + if err = initRayJobStatusIfNeed(ctx, rayJobInstance); err != nil { return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } case rayv1.JobDeploymentStatusInitializing: - if shouldUpdate := r.updateStatusToSuspendingIfNeeded(ctx, rayJobInstance); shouldUpdate { + if shouldUpdate := updateStatusToSuspendingIfNeeded(ctx, rayJobInstance); shouldUpdate { break } - if shouldUpdate := r.checkActiveDeadlineAndUpdateStatusIfNeeded(ctx, rayJobInstance); shouldUpdate { + if shouldUpdate := checkActiveDeadlineAndUpdateStatusIfNeeded(ctx, rayJobInstance); shouldUpdate { break } @@ -152,8 +177,8 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) // Check the current status of RayCluster before submitting. if clientURL := rayJobInstance.Status.DashboardURL; clientURL == "" { - if rayClusterInstance.Status.State != rayv1.Ready { - logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State) + if rayClusterInstance.Status.State != rayv1.Ready { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } @@ -164,21 +189,35 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) rayJobInstance.Status.DashboardURL = clientURL } + if rayJobInstance.Spec.SubmissionMode == rayv1.InteractiveMode { + logger.Info("SubmissionMode is InteractiveMode and the RayCluster is created. Transition the status from `Initializing` to `Waiting`.") + rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusWaiting + break + } + if rayJobInstance.Spec.SubmissionMode == rayv1.K8sJobMode { if err := r.createK8sJobIfNeed(ctx, rayJobInstance, rayClusterInstance); err != nil { return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } } - logger.Info("Both RayCluster and the submitter K8s Job are created. Transition the status from `Initializing` to `Running`.", - "RayJob", rayJobInstance.Name, "RayCluster", rayJobInstance.Status.RayClusterName) + logger.Info("Both RayCluster and the submitter K8s Job are created. Transition the status from `Initializing` to `Running`.", "SubmissionMode", rayJobInstance.Spec.SubmissionMode, + "RayCluster", rayJobInstance.Status.RayClusterName) + rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusRunning + case rayv1.JobDeploymentStatusWaiting: + // Try to get the Ray job id from rayJob.Spec.JobId + if rayJobInstance.Spec.JobId == "" { + return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil + } + + rayJobInstance.Status.JobId = rayJobInstance.Spec.JobId rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusRunning case rayv1.JobDeploymentStatusRunning: - if shouldUpdate := r.updateStatusToSuspendingIfNeeded(ctx, rayJobInstance); shouldUpdate { + if shouldUpdate := updateStatusToSuspendingIfNeeded(ctx, rayJobInstance); shouldUpdate { break } - if shouldUpdate := r.checkActiveDeadlineAndUpdateStatusIfNeeded(ctx, rayJobInstance); shouldUpdate { + if shouldUpdate := checkActiveDeadlineAndUpdateStatusIfNeeded(ctx, rayJobInstance); shouldUpdate { break } @@ -191,10 +230,10 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) // mode is not stuck in the `Running` status indefinitely. namespacedName := common.RayJobK8sJobNamespacedName(rayJobInstance) if err := r.Client.Get(ctx, namespacedName, job); err != nil { - logger.Error(err, "Failed to get the submitter Kubernetes Job", "NamespacedName", namespacedName) + logger.Error(err, "Failed to get the submitter Kubernetes Job for RayJob", "NamespacedName", namespacedName) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } - if shouldUpdate := r.checkK8sJobAndUpdateStatusIfNeeded(ctx, rayJobInstance, job); shouldUpdate { + if shouldUpdate := checkK8sJobAndUpdateStatusIfNeeded(ctx, rayJobInstance, job); shouldUpdate { break } } @@ -208,7 +247,10 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) // Check the current status of ray jobs rayDashboardClient := r.dashboardClientFunc() - rayDashboardClient.InitClient(rayJobInstance.Status.DashboardURL) + if err := rayDashboardClient.InitClient(ctx, rayJobInstance.Status.DashboardURL, rayClusterInstance); err != nil { + return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err + } + jobInfo, err := rayDashboardClient.GetJobInfo(ctx, rayJobInstance.Status.JobId) if err != nil { // If the Ray job was not found, GetJobInfo returns a BadRequest error. @@ -252,7 +294,7 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) rayJobInstance.Status.JobDeploymentStatus = jobDeploymentStatus rayJobInstance.Status.Reason = reason rayJobInstance.Status.Message = jobInfo.Message - case rayv1.JobDeploymentStatusSuspending: + case rayv1.JobDeploymentStatusSuspending, rayv1.JobDeploymentStatusRetrying: // The `suspend` operation should be atomic. In other words, if users set the `suspend` flag to true and then immediately // set it back to false, either all of the RayJob's associated resources should be cleaned up, or no resources should be // cleaned up at all. To keep the atomicity, if a RayJob is in the `Suspending` status, we should delete all of its @@ -281,9 +323,16 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) rayJobInstance.Status.DashboardURL = "" rayJobInstance.Status.JobId = "" rayJobInstance.Status.Message = "" + rayJobInstance.Status.Reason = "" // Reset the JobStatus to JobStatusNew and transition the JobDeploymentStatus to `Suspended`. rayJobInstance.Status.JobStatus = rayv1.JobStatusNew - rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusSuspended + + if rayJobInstance.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusSuspending { + rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusSuspended + } + if rayJobInstance.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusRetrying { + rayJobInstance.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusNew + } case rayv1.JobDeploymentStatusSuspended: if !rayJobInstance.Spec.Suspend { logger.Info("The status is 'Suspended', but the suspend flag is false. Transition the status to 'New'.") @@ -295,39 +344,76 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil case rayv1.JobDeploymentStatusComplete, rayv1.JobDeploymentStatusFailed: // If this RayJob uses an existing RayCluster (i.e., ClusterSelector is set), we should not delete the RayCluster. - logger.Info(string(rayJobInstance.Status.JobDeploymentStatus), "RayJob", rayJobInstance.Name, "ShutdownAfterJobFinishes", rayJobInstance.Spec.ShutdownAfterJobFinishes, "ClusterSelector", rayJobInstance.Spec.ClusterSelector) - if rayJobInstance.Spec.ShutdownAfterJobFinishes && len(rayJobInstance.Spec.ClusterSelector) == 0 { - ttlSeconds := rayJobInstance.Spec.TTLSecondsAfterFinished - nowTime := time.Now() - shutdownTime := rayJobInstance.Status.EndTime.Add(time.Duration(ttlSeconds) * time.Second) - logger.Info( - fmt.Sprintf("RayJob is %s", rayJobInstance.Status.JobDeploymentStatus), - "shutdownAfterJobFinishes", rayJobInstance.Spec.ShutdownAfterJobFinishes, - "ttlSecondsAfterFinished", ttlSeconds, - "Status.endTime", rayJobInstance.Status.EndTime, - "Now", nowTime, - "ShutdownTime", shutdownTime) + ttlSeconds := rayJobInstance.Spec.TTLSecondsAfterFinished + nowTime := time.Now() + shutdownTime := rayJobInstance.Status.EndTime.Add(time.Duration(ttlSeconds) * time.Second) + logger.Info(string(rayJobInstance.Status.JobDeploymentStatus), + "ShutdownAfterJobFinishes", rayJobInstance.Spec.ShutdownAfterJobFinishes, + "ClusterSelector", rayJobInstance.Spec.ClusterSelector, + "ttlSecondsAfterFinished", ttlSeconds, + "Status.endTime", rayJobInstance.Status.EndTime, + "Now", nowTime, + "ShutdownTime", shutdownTime) + + if features.Enabled(features.RayJobDeletionPolicy) && + rayJobInstance.Spec.DeletionPolicy != nil && + *rayJobInstance.Spec.DeletionPolicy != rayv1.DeleteNoneDeletionPolicy && + len(rayJobInstance.Spec.ClusterSelector) == 0 { + logger.Info("Shutdown behavior is defined by the deletion policy", "deletionPolicy", rayJobInstance.Spec.DeletionPolicy) + if shutdownTime.After(nowTime) { + delta := int32(time.Until(shutdownTime.Add(2 * time.Second)).Seconds()) + logger.Info("shutdownTime not reached, requeue this RayJob for n seconds", "seconds", delta) + return ctrl.Result{RequeueAfter: time.Duration(delta) * time.Second}, nil + } + + switch *rayJobInstance.Spec.DeletionPolicy { + case rayv1.DeleteClusterDeletionPolicy: + logger.Info("Deleting RayCluster", "RayCluster", rayJobInstance.Status.RayClusterName) + _, err = r.deleteClusterResources(ctx, rayJobInstance) + case rayv1.DeleteWorkersDeletionPolicy: + logger.Info("Suspending all worker groups", "RayCluster", rayJobInstance.Status.RayClusterName) + err = r.suspendWorkerGroups(ctx, rayJobInstance) + case rayv1.DeleteSelfDeletionPolicy: + logger.Info("Deleting RayJob") + err = r.Client.Delete(ctx, rayJobInstance) + default: + } + if err != nil { + return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err + } + } + + if (!features.Enabled(features.RayJobDeletionPolicy) || rayJobInstance.Spec.DeletionPolicy == nil) && rayJobInstance.Spec.ShutdownAfterJobFinishes && len(rayJobInstance.Spec.ClusterSelector) == 0 { + logger.Info("Shutdown behavior is defined by the `ShutdownAfterJobFinishes` flag", "shutdownAfterJobFinishes", rayJobInstance.Spec.ShutdownAfterJobFinishes) if shutdownTime.After(nowTime) { delta := int32(time.Until(shutdownTime.Add(2 * time.Second)).Seconds()) - logger.Info(fmt.Sprintf("shutdownTime not reached, requeue this RayJob for %d seconds", delta)) + logger.Info("shutdownTime not reached, requeue this RayJob for n seconds", "seconds", delta) return ctrl.Result{RequeueAfter: time.Duration(delta) * time.Second}, nil + } + if s := os.Getenv(utils.DELETE_RAYJOB_CR_AFTER_JOB_FINISHES); strings.ToLower(s) == "true" { + err = r.Client.Delete(ctx, rayJobInstance) + logger.Info("RayJob is deleted") } else { // We only need to delete the RayCluster. We don't need to delete the submitter Kubernetes Job so that users can still access // the driver logs. In addition, a completed Kubernetes Job does not actually use any compute resources. - if _, err = r.deleteClusterResources(ctx, rayJobInstance); err != nil { - return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err - } + _, err = r.deleteClusterResources(ctx, rayJobInstance) + logger.Info("RayCluster is deleted", "RayCluster", rayJobInstance.Status.RayClusterName) + } + if err != nil { + return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } } + // If the RayJob is completed, we should not requeue it. return ctrl.Result{}, nil default: logger.Info("Unknown JobDeploymentStatus", "JobDeploymentStatus", rayJobInstance.Status.JobDeploymentStatus) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil } + checkBackoffLimitAndUpdateStatusIfNeeded(ctx, rayJobInstance) // This is the only place where we update the RayJob status. Please do NOT add any code - // between the above switch statement and the following code. + // between `checkBackoffLimitAndUpdateStatusIfNeeded` and the following code. if err = r.updateRayJobStatus(ctx, originalRayJobInstance, rayJobInstance); err != nil { logger.Info("Failed to update RayJob status", "error", err) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err @@ -335,6 +421,48 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil } +// checkBackoffLimitAndUpdateStatusIfNeeded determines if a RayJob is eligible for retry based on the configured backoff limit, +// the job's success status, and its failure status. If eligible, sets the JobDeploymentStatus to Retrying. +func checkBackoffLimitAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) { + logger := ctrl.LoggerFrom(ctx) + + failedCount := int32(0) + if rayJob.Status.Failed != nil { + failedCount = *rayJob.Status.Failed + } + + succeededCount := int32(0) + if rayJob.Status.Succeeded != nil { + succeededCount = *rayJob.Status.Succeeded + } + + if rayJob.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusFailed { + failedCount++ + } + + if rayJob.Status.JobStatus == rayv1.JobStatusSucceeded && rayJob.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusComplete { + succeededCount++ + } + + rayJob.Status.Failed = ptr.To[int32](failedCount) + rayJob.Status.Succeeded = ptr.To[int32](succeededCount) + + if rayJob.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusFailed && rayJob.Spec.BackoffLimit != nil && *rayJob.Status.Failed < *rayJob.Spec.BackoffLimit+1 { + if rayJob.Status.Reason == rayv1.DeadlineExceeded { + logger.Info( + "RayJob is not eligible for retry due to failure with DeadlineExceeded", + "backoffLimit", *rayJob.Spec.BackoffLimit, + "succeeded", *rayJob.Status.Succeeded, + "failed", *rayJob.Status.Failed, + ) + return + } + logger.Info("RayJob is eligible for retry, setting JobDeploymentStatus to Retrying", + "backoffLimit", *rayJob.Spec.BackoffLimit, "succeeded", *rayJob.Status.Succeeded, "failed", *rayJob.Status.Failed) + rayJob.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusRetrying + } +} + // createK8sJobIfNeed creates a Kubernetes Job for the RayJob if it doesn't exist. func (r *RayJobReconciler) createK8sJobIfNeed(ctx context.Context, rayJobInstance *rayv1.RayJob, rayClusterInstance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) @@ -342,25 +470,21 @@ func (r *RayJobReconciler) createK8sJobIfNeed(ctx context.Context, rayJobInstanc namespacedName := common.RayJobK8sJobNamespacedName(rayJobInstance) if err := r.Client.Get(ctx, namespacedName, job); err != nil { if errors.IsNotFound(err) { - submitterTemplate, err := r.getSubmitterTemplate(ctx, rayJobInstance, rayClusterInstance) + submitterTemplate, err := getSubmitterTemplate(ctx, rayJobInstance, rayClusterInstance) if err != nil { - logger.Error(err, "failed to get submitter template") return err } return r.createNewK8sJob(ctx, rayJobInstance, submitterTemplate) } - - // Some other error occurred while trying to get the Job - logger.Error(err, "failed to get Kubernetes Job") return err } - logger.Info("Kubernetes Job already exists", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name) + logger.Info("The submitter Kubernetes Job for RayJob already exists", "Kubernetes Job", job.Name) return nil } // getSubmitterTemplate builds the submitter pod template for the Ray job. -func (r *RayJobReconciler) getSubmitterTemplate(ctx context.Context, rayJobInstance *rayv1.RayJob, rayClusterInstance *rayv1.RayCluster) (corev1.PodTemplateSpec, error) { +func getSubmitterTemplate(ctx context.Context, rayJobInstance *rayv1.RayJob, rayClusterInstance *rayv1.RayCluster) (corev1.PodTemplateSpec, error) { logger := ctrl.LoggerFrom(ctx) var submitterTemplate corev1.PodTemplateSpec @@ -379,7 +503,8 @@ func (r *RayJobReconciler) getSubmitterTemplate(ctx context.Context, rayJobInsta if err != nil { return corev1.PodTemplateSpec{}, err } - submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command = k8sJobCommand + submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command = []string{"/bin/sh"} + submitterTemplate.Spec.Containers[utils.RayContainerIndex].Args = []string{"-c", strings.Join(k8sJobCommand, " ")} logger.Info("No command is specified in the user-provided template. Default command is used", "command", k8sJobCommand) } else { logger.Info("User-provided command is used", "command", submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command) @@ -409,6 +534,10 @@ func (r *RayJobReconciler) getSubmitterTemplate(ctx context.Context, rayJobInsta // createNewK8sJob creates a new Kubernetes Job. It returns an error. func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance *rayv1.RayJob, submitterTemplate corev1.PodTemplateSpec) error { logger := ctrl.LoggerFrom(ctx) + submitterBackoffLimit := ptr.To[int32](2) + if rayJobInstance.Spec.SubmitterConfig != nil && rayJobInstance.Spec.SubmitterConfig.BackoffLimit != nil { + submitterBackoffLimit = rayJobInstance.Spec.SubmitterConfig.BackoffLimit + } job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: rayJobInstance.Name, @@ -424,24 +553,24 @@ func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance * // is attempted 3 times at the maximum, but still mitigates the case of unrecoverable // application-level errors, where the maximum number of retries is reached, and the job // completion time increases with no benefits, but wasted resource cycles. - BackoffLimit: pointer.Int32(2), + BackoffLimit: submitterBackoffLimit, Template: submitterTemplate, }, } // Set the ownership in order to do the garbage collection by k8s. if err := ctrl.SetControllerReference(rayJobInstance, job, r.Scheme); err != nil { - logger.Error(err, "failed to set controller reference") return err } // Create the Kubernetes Job if err := r.Client.Create(ctx, job); err != nil { - logger.Error(err, "failed to create k8s Job") + logger.Error(err, "Failed to create new submitter Kubernetes Job for RayJob") + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.FailedToCreateRayJobSubmitter), "Failed to create new Kubernetes Job %s/%s: %v", job.Namespace, job.Name, err) return err } - logger.Info("Kubernetes Job created", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name) - r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Created", "Created Kubernetes Job %s", job.Name) + logger.Info("Created submitter Kubernetes Job for RayJob", "Kubernetes Job", job.Name) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.CreatedRayJobSubmitter), "Created Kubernetes Job %s/%s", job.Namespace, job.Name) return nil } @@ -461,20 +590,20 @@ func (r *RayJobReconciler) deleteSubmitterJob(ctx context.Context, rayJobInstanc if err := r.Client.Get(ctx, namespacedName, job); err != nil { if errors.IsNotFound(err) { isJobDeleted = true - logger.Info("The submitter Kubernetes Job has been already deleted", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name) + logger.Info("The submitter Kubernetes Job has been already deleted", "Kubernetes Job", job.Name) } else { - logger.Error(err, "Failed to get Kubernetes Job") return false, err } } else { if !job.DeletionTimestamp.IsZero() { - logger.Info("The Job deletion is ongoing.", "RayJob", rayJobInstance.Name, "Submitter K8s Job", job.Name) + logger.Info("The deletion of submitter Kubernetes Job for RayJob is ongoing.", "Submitter K8s Job", job.Name) } else { if err := r.Client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.FailedToDeleteRayJobSubmitter), "Failed to delete submitter K8s Job %s/%s: %v", job.Namespace, job.Name, err) return false, err } - logger.Info("The associated submitter Job is deleted", "RayJob", rayJobInstance.Name, "Submitter K8s Job", job.Name) - r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Deleted", "Deleted submitter K8s Job %s", job.Name) + logger.Info("The associated submitter Kubernetes Job for RayJob is deleted", "Submitter K8s Job", job.Name) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.DeletedRayJobSubmitter), "Deleted submitter K8s Job %s/%s", job.Namespace, job.Name) } } @@ -494,19 +623,20 @@ func (r *RayJobReconciler) deleteClusterResources(ctx context.Context, rayJobIns // If the cluster is not found, it means the cluster has been already deleted. // Don't return error to make this function idempotent. isClusterDeleted = true - logger.Info("The associated cluster has been already deleted and it can not be found", "RayCluster", clusterIdentifier) + logger.Info("The associated RayCluster for RayJob has been already deleted and it can not be found", "RayCluster", clusterIdentifier) } else { return false, err } } else { if !cluster.DeletionTimestamp.IsZero() { - logger.Info("The cluster deletion is ongoing.", "rayjob", rayJobInstance.Name, "raycluster", cluster.Name) + logger.Info("The deletion of the associated RayCluster for RayJob is ongoing.", "RayCluster", cluster.Name) } else { if err := r.Delete(ctx, &cluster); err != nil { + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.FailedToDeleteRayCluster), "Failed to delete cluster %s/%s: %v", cluster.Namespace, cluster.Name, err) return false, err } - logger.Info("The associated cluster is deleted", "RayCluster", clusterIdentifier) - r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Deleted", "Deleted cluster %s", rayJobInstance.Status.RayClusterName) + logger.Info("The associated RayCluster for RayJob is deleted", "RayCluster", clusterIdentifier) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.DeletedRayCluster), "Deleted cluster %s/%s", cluster.Namespace, cluster.Name) } } @@ -514,14 +644,42 @@ func (r *RayJobReconciler) deleteClusterResources(ctx context.Context, rayJobIns return isClusterDeleted, nil } +func (r *RayJobReconciler) suspendWorkerGroups(ctx context.Context, rayJobInstance *rayv1.RayJob) error { + logger := ctrl.LoggerFrom(ctx) + clusterIdentifier := common.RayJobRayClusterNamespacedName(rayJobInstance) + + cluster := rayv1.RayCluster{} + if err := r.Get(ctx, clusterIdentifier, &cluster); err != nil { + return err + } + + for i := range cluster.Spec.WorkerGroupSpecs { + cluster.Spec.WorkerGroupSpecs[i].Suspend = ptr.To[bool](true) + } + + if err := r.Update(ctx, &cluster); err != nil { + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, + string(utils.FailedToUpdateRayCluster), + "Failed to suspend worker groups in cluster %s/%s: %v", + cluster.Namespace, cluster.Name, err) + return err + } + + logger.Info("All worker groups for RayCluster have had `suspend` set to true", "RayCluster", clusterIdentifier) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.UpdatedRayCluster), "Set the `suspend` field to true for all worker groups in cluster %s/%s", cluster.Namespace, cluster.Name) + + return nil +} + // SetupWithManager sets up the controller with the Manager. -func (r *RayJobReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *RayJobReconciler) SetupWithManager(mgr ctrl.Manager, reconcileConcurrency int) error { return ctrl.NewControllerManagedBy(mgr). For(&rayv1.RayJob{}). Owns(&rayv1.RayCluster{}). Owns(&corev1.Service{}). Owns(&batchv1.Job{}). WithOptions(controller.Options{ + MaxConcurrentReconciles: reconcileConcurrency, LogConstructor: func(request *reconcile.Request) logr.Logger { logger := ctrl.Log.WithName("controllers").WithName("RayJob") if request != nil { @@ -536,16 +694,17 @@ func (r *RayJobReconciler) SetupWithManager(mgr ctrl.Manager) error { // This function is the sole place where `JobDeploymentStatusInitializing` is defined. It initializes `Status.JobId` and `Status.RayClusterName` // prior to job submissions and RayCluster creations. This is used to avoid duplicate job submissions and cluster creations. In addition, this // function also sets `Status.StartTime` to support `ActiveDeadlineSeconds`. -func (r *RayJobReconciler) initRayJobStatusIfNeed(ctx context.Context, rayJob *rayv1.RayJob) error { +// This function will set or generate JobId if SubmissionMode is not InteractiveMode. +func initRayJobStatusIfNeed(ctx context.Context, rayJob *rayv1.RayJob) error { logger := ctrl.LoggerFrom(ctx) shouldUpdateStatus := rayJob.Status.JobId == "" || rayJob.Status.RayClusterName == "" || rayJob.Status.JobStatus == "" // Please don't update `shouldUpdateStatus` below. - logger.Info("initRayJobStatusIfNeed", "shouldUpdateStatus", shouldUpdateStatus, "RayJob", rayJob.Name, "jobId", rayJob.Status.JobId, "rayClusterName", rayJob.Status.RayClusterName, "jobStatus", rayJob.Status.JobStatus) + logger.Info("initRayJobStatusIfNeed", "shouldUpdateStatus", shouldUpdateStatus, "jobId", rayJob.Status.JobId, "rayClusterName", rayJob.Status.RayClusterName, "jobStatus", rayJob.Status.JobStatus) if !shouldUpdateStatus { return nil } - if rayJob.Status.JobId == "" { + if rayJob.Spec.SubmissionMode != rayv1.InteractiveMode && rayJob.Status.JobId == "" { if rayJob.Spec.JobId != "" { rayJob.Status.JobId = rayJob.Spec.JobId } else { @@ -607,34 +766,32 @@ func (r *RayJobReconciler) getOrCreateRayClusterInstance(ctx context.Context, ra rayClusterInstance := &rayv1.RayCluster{} if err := r.Get(ctx, rayClusterNamespacedName, rayClusterInstance); err != nil { if errors.IsNotFound(err) { - logger.Info("RayCluster not found", "RayJob", rayJobInstance.Name, "RayCluster", rayClusterNamespacedName) + logger.Info("RayCluster not found", "RayCluster", rayClusterNamespacedName) if len(rayJobInstance.Spec.ClusterSelector) != 0 { - err := fmt.Errorf("we have choosed the cluster selector mode, failed to find the cluster named %v, err: %v", rayClusterNamespacedName.Name, err) + err := fmt.Errorf("we have choosed the cluster selector mode, failed to find the cluster named %v, err: %w", rayClusterNamespacedName.Name, err) return nil, err } logger.Info("RayCluster not found, creating RayCluster!", "RayCluster", rayClusterNamespacedName) rayClusterInstance, err = r.constructRayClusterForRayJob(rayJobInstance, rayClusterNamespacedName.Name) if err != nil { - logger.Error(err, "unable to construct a new RayCluster") return nil, err } if err := r.Create(ctx, rayClusterInstance); err != nil { - logger.Error(err, "unable to create RayCluster for RayJob", "RayCluster", rayClusterInstance) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.FailedToCreateRayCluster), "Failed to create RayCluster %s/%s: %v", rayClusterInstance.Namespace, rayClusterInstance.Name, err) return nil, err } - r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Created", "Created RayCluster %s", rayJobInstance.Status.RayClusterName) + r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.CreatedRayCluster), "Created RayCluster %s/%s", rayClusterInstance.Namespace, rayClusterInstance.Name) } else { - logger.Error(err, "Fail to get RayCluster!") return nil, err } } - logger.Info("Found associated RayCluster for RayJob", "RayJob", rayJobInstance.Name, "RayCluster", rayClusterNamespacedName) + logger.Info("Found the associated RayCluster for RayJob", "RayCluster", rayClusterNamespacedName) // Verify that RayJob is not in cluster selector mode first to avoid nil pointer dereference error during spec comparison. // This is checked by ensuring len(rayJobInstance.Spec.ClusterSelector) equals 0. if len(rayJobInstance.Spec.ClusterSelector) == 0 && !utils.CompareJsonStruct(rayClusterInstance.Spec, *rayJobInstance.Spec.RayClusterSpec) { - logger.Info("Disregard changes in RayClusterSpec of RayJob", "RayJob", rayJobInstance.Name) + logger.Info("Disregard changes in RayClusterSpec of RayJob") } return rayClusterInstance, nil @@ -665,7 +822,7 @@ func (r *RayJobReconciler) constructRayClusterForRayJob(rayJobInstance *rayv1.Ra return rayCluster, nil } -func (r *RayJobReconciler) updateStatusToSuspendingIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) bool { +func updateStatusToSuspendingIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) bool { logger := ctrl.LoggerFrom(ctx) if !rayJob.Spec.Suspend { return false @@ -676,19 +833,19 @@ func (r *RayJobReconciler) updateStatusToSuspendingIfNeeded(ctx context.Context, rayv1.JobDeploymentStatusInitializing: {}, } if _, ok := validTransitions[rayJob.Status.JobDeploymentStatus]; !ok { - logger.Info("The current status is not allowed to transition to `Suspending`", "RayJob", rayJob.Name, "JobDeploymentStatus", rayJob.Status.JobDeploymentStatus) + logger.Info("The current status is not allowed to transition to `Suspending`", "JobDeploymentStatus", rayJob.Status.JobDeploymentStatus) return false } - logger.Info(fmt.Sprintf("Try to transition the status from `%s` to `Suspending`", rayJob.Status.JobDeploymentStatus), "RayJob", rayJob.Name) + logger.Info("Try to transition the status to `Suspending`", "oldStatus", rayJob.Status.JobDeploymentStatus) rayJob.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusSuspending return true } -func (r *RayJobReconciler) checkK8sJobAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob, job *batchv1.Job) bool { +func checkK8sJobAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob, job *batchv1.Job) bool { logger := ctrl.LoggerFrom(ctx) for _, cond := range job.Status.Conditions { if cond.Type == batchv1.JobFailed && cond.Status == corev1.ConditionTrue { - logger.Info("The submitter Kubernetes Job has failed. Attempting to transition the status to `Failed`.", "RayJob", rayJob.Name, "Submitter K8s Job", job.Name, "Reason", cond.Reason, "Message", cond.Message) + logger.Info("The submitter Kubernetes Job has failed. Attempting to transition the status to `Failed`.", "Submitter K8s Job", job.Name, "Reason", cond.Reason, "Message", cond.Message) rayJob.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusFailed // The submitter Job needs to wait for the user code to finish and retrieve its logs. // Therefore, a failed Submitter Job indicates that the submission itself has failed or the user code has thrown an error. @@ -705,11 +862,12 @@ func (r *RayJobReconciler) checkK8sJobAndUpdateStatusIfNeeded(ctx context.Contex return false } -func (r *RayJobReconciler) checkActiveDeadlineAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) bool { +func checkActiveDeadlineAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) bool { logger := ctrl.LoggerFrom(ctx) if rayJob.Spec.ActiveDeadlineSeconds == nil || time.Now().Before(rayJob.Status.StartTime.Add(time.Duration(*rayJob.Spec.ActiveDeadlineSeconds)*time.Second)) { return false } + logger.Info("The RayJob has passed the activeDeadlineSeconds. Transition the status to `Failed`.", "StartTime", rayJob.Status.StartTime, "ActiveDeadlineSeconds", *rayJob.Spec.ActiveDeadlineSeconds) rayJob.Status.JobDeploymentStatus = rayv1.JobDeploymentStatusFailed rayJob.Status.Reason = rayv1.DeadlineExceeded @@ -724,10 +882,12 @@ func validateRayJobSpec(rayJob *rayv1.RayJob) error { if rayJob.Spec.Suspend && !rayJob.Spec.ShutdownAfterJobFinishes { return fmt.Errorf("a RayJob with shutdownAfterJobFinishes set to false is not allowed to be suspended") } - if rayJob.Spec.Suspend && len(rayJob.Spec.ClusterSelector) != 0 { + + isClusterSelectorMode := len(rayJob.Spec.ClusterSelector) != 0 + if rayJob.Spec.Suspend && isClusterSelectorMode { return fmt.Errorf("the ClusterSelector mode doesn't support the suspend operation") } - if rayJob.Spec.RayClusterSpec == nil && len(rayJob.Spec.ClusterSelector) == 0 { + if rayJob.Spec.RayClusterSpec == nil && !isClusterSelectorMode { return fmt.Errorf("one of RayClusterSpec or ClusterSelector must be set") } // Validate whether RuntimeEnvYAML is a valid YAML string. Note that this only checks its validity @@ -738,5 +898,40 @@ func validateRayJobSpec(rayJob *rayv1.RayJob) error { if rayJob.Spec.ActiveDeadlineSeconds != nil && *rayJob.Spec.ActiveDeadlineSeconds <= 0 { return fmt.Errorf("activeDeadlineSeconds must be a positive integer") } + if rayJob.Spec.BackoffLimit != nil && *rayJob.Spec.BackoffLimit < 0 { + return fmt.Errorf("backoffLimit must be a positive integer") + } + if !features.Enabled(features.RayJobDeletionPolicy) && rayJob.Spec.DeletionPolicy != nil { + return fmt.Errorf("RayJobDeletionPolicy feature gate must be enabled to use the DeletionPolicy feature") + } + + if rayJob.Spec.DeletionPolicy != nil { + policy := *rayJob.Spec.DeletionPolicy + if isClusterSelectorMode { + switch policy { + case rayv1.DeleteClusterDeletionPolicy: + return fmt.Errorf("the ClusterSelector mode doesn't support DeletionPolicy=DeleteCluster") + case rayv1.DeleteWorkersDeletionPolicy: + return fmt.Errorf("the ClusterSelector mode doesn't support DeletionPolicy=DeleteWorkers") + } + } + + if policy == rayv1.DeleteWorkersDeletionPolicy && utils.IsAutoscalingEnabled(rayJob) { + // TODO (rueian): This can be supported in a future Ray version. We should check the RayVersion once we know it. + return fmt.Errorf("DeletionPolicy=DeleteWorkers currently does not support RayCluster with autoscaling enabled") + } + + if rayJob.Spec.ShutdownAfterJobFinishes && policy == rayv1.DeleteNoneDeletionPolicy { + return fmt.Errorf("shutdownAfterJobFinshes is set to 'true' while deletion policy is 'DeleteNone'") + } + } + return nil +} + +func validateRayJobStatus(rayJob *rayv1.RayJob) error { + if rayJob.Status.JobDeploymentStatus == rayv1.JobDeploymentStatusWaiting && rayJob.Spec.SubmissionMode != rayv1.InteractiveMode { + return fmt.Errorf("invalid RayJob State: JobDeploymentStatus cannot be `Waiting` when SubmissionMode is not InteractiveMode") + } + return nil } diff --git a/ray-operator/controllers/ray/rayjob_controller_suspended_test.go b/ray-operator/controllers/ray/rayjob_controller_suspended_test.go index b1d076170ce..0c49efd68cd 100644 --- a/ray-operator/controllers/ray/rayjob_controller_suspended_test.go +++ b/ray-operator/controllers/ray/rayjob_controller_suspended_test.go @@ -17,242 +17,178 @@ package ray import ( "context" - "fmt" "time" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" - - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - - corev1 "k8s.io/api/core/v1" - "k8s.io/utils/pointer" - batchv1 "k8s.io/api/batch/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/retry" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" ) -var _ = Context("Inside the default namespace", func() { - ctx := context.TODO() - var workerPods corev1.PodList - var headPods corev1.PodList - - mySuspendedRayCluster := &rayv1.RayCluster{} +var _ = Context("RayJob with suspend operation", func() { + Describe("When creating a rayjob with suspend == true", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayCluster := &rayv1.RayCluster{} + rayJob := rayJobTemplate("rayjob-suspend", namespace) + rayJob.Spec.Suspend = true - mySuspendedRayJob := &rayv1.RayJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rayjob-test-suspend", - Namespace: "default", - }, - Spec: rayv1.RayJobSpec{ - ShutdownAfterJobFinishes: true, - Suspend: true, - Entrypoint: "sleep 999", - RayClusterSpec: &rayv1.RayClusterSpec{ - RayVersion: "2.9.0", - HeadGroupSpec: rayv1.HeadGroupSpec{ - RayStartParams: map[string]string{}, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-head", - Image: "rayproject/ray:2.8.0", - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("1"), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("1"), - corev1.ResourceMemory: resource.MustParse("2Gi"), - }, - }, - Ports: []corev1.ContainerPort{ - { - Name: "gcs-server", - ContainerPort: 6379, - }, - { - Name: "dashboard", - ContainerPort: 8265, - }, - { - Name: "head", - ContainerPort: 10001, - }, - { - Name: "dashboard-agent", - ContainerPort: 52365, - }, - }, - }, - }, - }, - }, - }, - WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ - { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), - GroupName: "small-group", - RayStartParams: map[string]string{}, - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-worker", - Image: "rayproject/ray:2.8.0", - }, - }, - }, - }, - }, - }, - }, - }, - } - - Describe("When creating a rayjob with suspend == true", func() { It("should create a rayjob object", func() { - err := k8sClient.Create(ctx, mySuspendedRayJob) + err := k8sClient.Create(ctx, rayJob) Expect(err).NotTo(HaveOccurred(), "failed to create test RayJob resource") }) - It("should see a rayjob object", func() { - Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: mySuspendedRayJob.Name, Namespace: "default"}, mySuspendedRayJob), - time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayJob = %v", mySuspendedRayJob.Name) - }) - It("should have deployment status suspended", func() { Eventually( - getRayJobDeploymentStatus(ctx, mySuspendedRayJob), + getRayJobDeploymentStatus(ctx, rayJob), time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusSuspended)) }) It("should NOT create a raycluster object", func() { Consistently( - getRayClusterNameForRayJob(ctx, mySuspendedRayJob), + getRayClusterNameForRayJob(ctx, rayJob), time.Second*3, time.Millisecond*500).Should(BeEmpty()) }) It("should unsuspend a rayjob object", func() { - mySuspendedRayJob.Spec.Suspend = false - err := k8sClient.Update(ctx, mySuspendedRayJob) - Expect(err).NotTo(HaveOccurred(), "failed to update test RayJob resource") + err := updateRayJobSuspendField(ctx, rayJob, false) + Expect(err).NotTo(HaveOccurred(), "failed to update RayJob") }) It("should create a raycluster object", func() { // Ray Cluster name can be present on RayJob's CRD Eventually( - getRayClusterNameForRayJob(ctx, mySuspendedRayJob), + getRayClusterNameForRayJob(ctx, rayJob), time.Second*15, time.Millisecond*500).Should(Not(BeEmpty())) // The actual cluster instance and underlying resources SHOULD be created when suspend == false Eventually( // k8sClient client does not throw error if cluster IS found - getResourceFunc(ctx, client.ObjectKey{Name: mySuspendedRayJob.Status.RayClusterName, Namespace: "default"}, mySuspendedRayCluster), + getResourceFunc(ctx, common.RayJobRayClusterNamespacedName(rayJob), rayCluster), time.Second*3, time.Millisecond*500).Should(BeNil()) }) - It("should create 3 workers", func() { - Eventually( - listResourceFunc(ctx, &workerPods, client.MatchingLabels{ - utils.RayClusterLabelKey: mySuspendedRayCluster.Name, - utils.RayNodeGroupLabelKey: "small-group", + It("should NOT create the underlying K8s job yet because the cluster is not ready", func() { + underlyingK8sJob := &batchv1.Job{} + Consistently( + // k8sClient client throws error if resource not found + func() bool { + err := getResourceFunc(ctx, common.RayJobK8sJobNamespacedName(rayJob), underlyingK8sJob)() + return errors.IsNotFound(err) }, - &client.ListOptions{Namespace: "default"}), - time.Second*15, time.Millisecond*500).Should(Equal(3), fmt.Sprintf("workerGroup %v", workerPods.Items)) - if len(workerPods.Items) > 0 { - Expect(workerPods.Items[0].Status.Phase).Should(Or(Equal(corev1.PodRunning), Equal(corev1.PodPending))) - } + time.Second*3, time.Millisecond*500).Should(BeTrue()) }) - It("should create a head pod resource", func() { - err := k8sClient.List(ctx, &headPods, - client.MatchingLabels{ - utils.RayClusterLabelKey: mySuspendedRayCluster.Name, - utils.RayNodeGroupLabelKey: utils.RayNodeHeadGroupLabelValue, - }, - &client.ListOptions{Namespace: "default"}, - client.InNamespace(mySuspendedRayCluster.Namespace)) - - Expect(err).NotTo(HaveOccurred(), "failed list head pods") - Expect(len(headPods.Items)).Should(BeNumerically("==", 1), "My head pod list= %v", headPods.Items) + It("should be able to update all Pods to Running", func() { + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + }) - pod := &corev1.Pod{} - if len(headPods.Items) > 0 { - pod = &headPods.Items[0] - } + It("Dashboard URL should be set", func() { Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: pod.Name, Namespace: "default"}, pod), - time.Second*3, time.Millisecond*500).Should(BeNil(), "My head pod = %v", pod) - Expect(pod.Status.Phase).Should(Or(Equal(corev1.PodPending))) + getDashboardURLForRayJob(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(HavePrefix(rayJob.Name), "Dashboard URL = %v", rayJob.Status.DashboardURL) }) - It("should NOT create the underlying K8s job yet because the cluster is not ready", func() { + It("should create the underlying Kubernetes Job object", func() { underlyingK8sJob := &batchv1.Job{} + // The underlying Kubernetes Job should be created when the RayJob is created Eventually( - // k8sClient client throws error if resource not found - func() bool { - err := getResourceFunc(ctx, client.ObjectKey{Name: mySuspendedRayJob.Name, Namespace: "default"}, underlyingK8sJob)() - return errors.IsNotFound(err) - }, - time.Second*10, time.Millisecond*500).Should(BeTrue()) + getResourceFunc(ctx, common.RayJobK8sJobNamespacedName(rayJob), underlyingK8sJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Expected Kubernetes job to be present") }) + }) - It("should be able to update all Pods to Running", func() { - // We need to manually update Pod statuses otherwise they'll always be Pending. - // envtest doesn't create a full K8s cluster. It's only the control plane. - // There's no container runtime or any other K8s controllers. - // So Pods are created, but no controller updates them from Pending to Running. - // See https://book.kubebuilder.io/reference/envtest.html + Describe("RayJob suspend operation shoud be atomic", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-atomic-suspend", namespace) + rayCluster := &rayv1.RayCluster{} - for _, headPod := range headPods.Items { - headPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &headPod)).Should(BeNil()) - } + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + }) + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { Eventually( - isAllPodsRunning(ctx, headPods, client.MatchingLabels{ - utils.RayClusterLabelKey: mySuspendedRayCluster.Name, - utils.RayNodeGroupLabelKey: utils.RayNodeHeadGroupLabelValue, - }, "default"), - time.Second*15, time.Millisecond*500).Should(Equal(true), "Head Pod should be running.") + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) - for _, workerPod := range workerPods.Items { - workerPod.Status.Phase = corev1.PodRunning - Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(BeNil()) - } + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + Eventually( + getClusterState(ctx, namespace, rayJob.Status.RayClusterName), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { Eventually( - isAllPodsRunning(ctx, workerPods, client.MatchingLabels{utils.RayClusterLabelKey: mySuspendedRayCluster.Name, utils.RayNodeGroupLabelKey: "small-group"}, "default"), - time.Second*15, time.Millisecond*500).Should(Equal(true), "All worker Pods should be running.") + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) }) - It("Dashboard URL should be set", func() { + // The finalizer here is used to prevent the RayCluster from being deleted, + // ensuring that the RayJob remains in Suspending status once the suspend field is set to true. + It("Add finalizer to the RayCluster", func() { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := k8sClient.Get(ctx, common.RayJobRayClusterNamespacedName(rayJob), rayCluster) + if err != nil { + return err + } + rayCluster.Finalizers = append(rayCluster.Finalizers, "ray.io/deletion-blocker") + return k8sClient.Update(ctx, rayCluster) + }) + Expect(err).NotTo(HaveOccurred(), "failed to add finalizer to RayCluster") + }) + + It("Suspend the RayJob", func() { + err := updateRayJobSuspendField(ctx, rayJob, true) + Expect(err).NotTo(HaveOccurred(), "failed to update RayJob") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Suspending.", func() { Eventually( - getDashboardURLForRayJob(ctx, mySuspendedRayJob), - time.Second*3, time.Millisecond*500).Should(HavePrefix(mySuspendedRayJob.Name), "Dashboard URL = %v", mySuspendedRayJob.Status.DashboardURL) + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusSuspending), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) }) - It("should create the underlying Kubernetes Job object", func() { - underlyingK8sJob := &batchv1.Job{} - // The underlying Kubernetes Job should be created when the RayJob is created + // The suspend operation is atomic; regardless of how the user sets the suspend field at this moment, the status should be Suspending. + It("Change the suspend field of RayJob from true to false and then back to true.", func() { + err := updateRayJobSuspendField(ctx, rayJob, false) + Expect(err).NotTo(HaveOccurred(), "failed to update RayJob") + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusSuspending), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + err = updateRayJobSuspendField(ctx, rayJob, true) + Expect(err).NotTo(HaveOccurred(), "failed to update RayJob") + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusSuspending), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("Remove finalizer from the RayCluster", func() { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := k8sClient.Get(ctx, common.RayJobRayClusterNamespacedName(rayJob), rayCluster) + if err != nil { + return err + } + rayCluster.Finalizers = []string{} + return k8sClient.Update(ctx, rayCluster) + }) + Expect(err).NotTo(HaveOccurred(), "failed to remove finalizer from RayCluster") + }) + + It("RayJobs's JobDeploymentStatus transitions from Suspending to Suspended.", func() { Eventually( - // k8sClient does not throw error if Job is found - func() error { - return getResourceFunc(ctx, client.ObjectKey{Name: mySuspendedRayJob.Name, Namespace: "default"}, underlyingK8sJob)() - }, - time.Second*15, time.Millisecond*500).Should(BeNil(), "Expected Kubernetes job to be present") + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusSuspended), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) }) }) }) diff --git a/ray-operator/controllers/ray/rayjob_controller_test.go b/ray-operator/controllers/ray/rayjob_controller_test.go index dfea31e9ec2..286c0ff476d 100644 --- a/ray-operator/controllers/ray/rayjob_controller_test.go +++ b/ray-operator/controllers/ray/rayjob_controller_test.go @@ -17,10 +17,11 @@ package ray import ( "context" - "fmt" + "os" "time" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -29,10 +30,11 @@ import ( rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/features" + "github.com/ray-project/kuberay/ray-operator/test/support" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/utils/pointer" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -50,7 +52,7 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob { SubmissionMode: rayv1.K8sJobMode, ShutdownAfterJobFinishes: true, RayClusterSpec: &rayv1.RayClusterSpec{ - RayVersion: "2.9.0", + RayVersion: support.GetRayVersion(), HeadGroupSpec: rayv1.HeadGroupSpec{ RayStartParams: map[string]string{}, Template: corev1.PodTemplateSpec{ @@ -58,7 +60,7 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob { Containers: []corev1.Container{ { Name: "ray-head", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1"), @@ -90,9 +92,9 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob { }, WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{}, Template: corev1.PodTemplateSpec{ @@ -100,7 +102,7 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), }, }, }, @@ -112,23 +114,744 @@ func rayJobTemplate(name string, namespace string) *rayv1.RayJob { } } -var _ = Context("RayJob in K8sJobMode", func() { - Describe("Successful RayJob in K8sJobMode", func() { +var _ = Context("RayJob with different submission modes", func() { + Context("RayJob in K8sJobMode", func() { + Describe("RayJob SubmitterConfig BackoffLimit", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJobWithDefaultSubmitterConfigBackoffLimit := rayJobTemplate("rayjob-default", namespace) + rayJobWithNonDefaultSubmitterConfigBackoffLimit := rayJobTemplate("rayjob-non-default", namespace) + rayJobWithNonDefaultSubmitterConfigBackoffLimit.Spec.SubmitterConfig = &rayv1.SubmitterConfig{ + BackoffLimit: ptr.To[int32](88), + } + rayJobs := make(map[*rayv1.RayJob]int32) + rayJobs[rayJobWithDefaultSubmitterConfigBackoffLimit] = int32(2) + rayJobs[rayJobWithNonDefaultSubmitterConfigBackoffLimit] = int32(88) + + It("Verify RayJob spec", func() { + for rayJob := range rayJobs { + // Make sure the submission mode is K8sJobMode. + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) + } + }) + + It("Create RayJob custom resources", func() { + for rayJob := range rayJobs { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob: %v", rayJob.Name) + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + } + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + for rayJob := range rayJobs { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + } + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + for rayJob := range rayJobs { + rayCluster := &rayv1.RayCluster{} + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Make RayCluster.Status.State to be rayv1.Ready. + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + + // RayJobs's JobDeploymentStatus transitions to Running. + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + } + }) + + It("Verify K8s Job BackoffLimit", func() { + for rayJob, backoffLimit := range rayJobs { + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + Expect(*(job.Spec.BackoffLimit)).To(Equal(backoffLimit)) + } + }) + }) + + Describe("Successful RayJob in K8sJobMode", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-test", namespace) + rayCluster := &rayv1.RayCluster{} + + It("Verify RayJob spec", func() { + // This test case simulates the most common scenario in the RayJob code path. + // (1) The submission mode is K8sJobMode. + // (2) `shutdownAfterJobFinishes` is true. + // In this test, RayJob passes through the following states: New -> Initializing -> Running -> Complete + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) + Expect(rayJob.Spec.ShutdownAfterJobFinishes).To(BeTrue()) + + // This test assumes that there is only one worker group. + Expect(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs).To(HaveLen(1)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { + // Update fake dashboard client to return job info with "Succeeded" status. + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + + // RayJob transitions to Complete. + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("If shutdownAfterJobFinishes is true, RayCluster should be deleted but not the submitter Job.", func() { + Eventually( + func() bool { + return apierrors.IsNotFound(getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster)()) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + Consistently( + getResourceFunc(ctx, namespacedName, job), + time.Second*3, time.Millisecond*500).Should(BeNil()) + }) + }) + + Describe("Invalid RayJob in K8sJobMode", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-invalid-test", namespace) + rayCluster := &rayv1.RayCluster{Spec: *rayJob.Spec.RayClusterSpec} + template := common.GetDefaultSubmitterTemplate(rayCluster) + template.Spec.RestartPolicy = "" // Make it invalid to create a submitter. Ref: https://github.com/ray-project/kuberay/pull/2389#issuecomment-2359564334 + rayJob.Spec.SubmitterPodTemplate = &template + + It("Verify RayJob spec", func() { + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + func() ([]corev1.Event, error) { + events := &corev1.EventList{} + if err := k8sClient.List(ctx, events, client.InNamespace(rayJob.Namespace)); err != nil { + return nil, err + } + return events.Items, nil + }, + time.Second*3, time.Millisecond*500).Should(ContainElement(HaveField("Message", ContainSubstring("Failed to create new Kubernetes Job default/rayjob-invalid-test")))) + + _ = k8sClient.Delete(ctx, rayJob) + }) + }) + + Describe("Successful RayJob in K8sjobMode with DELETE_RAYJOB_CR_AFTER_JOB_FINISHES", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-test-delete", namespace) + rayCluster := &rayv1.RayCluster{} + + It("Verify RayJob spec", func() { + // This test case simulates the most common scenario in the RayJob code path. + // (1) The submission mode is K8sJobMode. + // (2) `shutdownAfterJobFinishes` is true. + // In this test, RayJob passes through the following states: New -> Initializing -> Running -> Complete + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) + Expect(rayJob.Spec.ShutdownAfterJobFinishes).To(BeTrue()) + + // This test assumes that there is only one worker group. + Expect(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs).To(HaveLen(1)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { + // Update fake dashboard client to return job info with "Succeeded" status. + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + + // RayJob transitions to Complete. + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("If DELETE_RAYJOB_CR_AFTER_JOB_FINISHES environement variable is set, RayJob should be deleted.", func() { + os.Setenv(utils.DELETE_RAYJOB_CR_AFTER_JOB_FINISHES, "true") + defer os.Unsetenv(utils.DELETE_RAYJOB_CR_AFTER_JOB_FINISHES) + Eventually( + func() bool { + return apierrors.IsNotFound(getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob)()) + }, time.Second*3, time.Millisecond*500).Should(BeTrue()) + }) + }) + + Describe("RayJob has passed the ActiveDeadlineSeconds", Ordered, func() { + ctx := context.Background() + namespace := "default" + activeDeadlineSeconds := int32(3) + rayJob := rayJobTemplate("rayjob-deadline", namespace) + rayJob.Spec.ActiveDeadlineSeconds = ptr.To[int32](activeDeadlineSeconds) + + It("Verify RayJob spec", func() { + // In this test, RayJob passes through the following states: New -> Initializing -> Complete (because of ActiveDeadlineSeconds). + Expect(rayJob.Spec.ActiveDeadlineSeconds).NotTo(BeNil()) + + // This test assumes that there is only one worker group. + Expect(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs).To(HaveLen(1)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("RayJobs has passed the activeDeadlineSeconds, and the JobDeploymentStatus transitions from Initializing to Complete.", func() { + // RayJob transitions to Complete. + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusFailed), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + Expect(rayJob.Status.Reason).To(Equal(rayv1.DeadlineExceeded)) + }) + }) + + Describe("Retrying RayJob in K8sJobMode", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-retry-test", namespace) + rayJob.Spec.BackoffLimit = ptr.To[int32](1) + rayCluster := &rayv1.RayCluster{} + + It("Verify RayJob spec", func() { + // This test case simulates a retry scenario in the RayJob when: + // (1) The submission mode is K8sJobMode. + // (2) backoffLimit > 0 + // In this test, RayJob passes through the following states: New -> Initializing -> Running -> Retrying -> New + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) + Expect(*rayJob.Spec.BackoffLimit).To(Equal(int32(1))) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running -> Retrying -> New -> Initializing", func() { + // Update fake dashboard client to return job info with "Failed" status. + //nolint:unparam // this is a mock and the function signature cannot change + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusFailed}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + + // record the current cluster name + oldClusterName := rayJob.Status.RayClusterName + + // RayJob transitions from Running -> Retrying -> New -> Initializing + // We only check the final state "Initializing" because it's difficult to test transient states like "Retrying" and "New" + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // validate the RayCluster is deleted on retry + Eventually( + func() bool { + return apierrors.IsNotFound(getResourceFunc(ctx, client.ObjectKey{Name: oldClusterName, Namespace: namespace}, rayCluster)()) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + + // validate the submitter Job is deleted on retry + Eventually( + func() bool { + return apierrors.IsNotFound(getResourceFunc(ctx, common.RayJobK8sJobNamespacedName(rayJob), job)()) + }, + time.Second*3, time.Millisecond*500).Should(BeTrue()) + }) + + It("In Initializing state, the RayCluster should eventually be created (attempt 2)", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready (attempt 2)", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running (attempt 2)", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running -> Complete (attempt 2)", func() { + // Update fake dashboard client to return job info with "Failed" status. + //nolint:unparam // this is a mock and the function signature cannot change + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + + // RayJob transitions from Running -> Complete + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("Validate RayJob succeeded and failed status", func() { + Eventually( + getRayJobSucceededStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(int32(1)), "succeeded = %v", rayJob.Status.Succeeded) + + Eventually( + getRayJobFailedStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(int32(1)), "failed = %v", rayJob.Status.Failed) + }) + }) + }) + + Context("RayJob in InteractiveMode", func() { + Describe("Successful RayJob", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-test-none-mode", namespace) + rayJob.Spec.SubmissionMode = rayv1.InteractiveMode + rayCluster := &rayv1.RayCluster{} + testRayJobId := "fake-id" + + It("Verify RayJob spec", func() { + Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.InteractiveMode)) + Expect(rayJob.Spec.ShutdownAfterJobFinishes).To(BeTrue()) + Expect(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs).To(HaveLen(1)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).To(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Waiting.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusWaiting), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("sets jobId in RayJob", func() { + err := setJobIdOnRayJob(ctx, rayJob, testRayJobId) + Expect(err).NotTo(HaveOccurred()) + }) + + It("RayJobs's JobDeploymentStatus transitions from Waiting to Running if annotation is set.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("should set RayJob's JobId to the value of the annotation", func() { + Expect(rayJob.Status.JobId).To(Equal(testRayJobId)) + }) + }) + }) + + Describe("RayJob with DeletionPolicy=DeleteCluster", Ordered, func() { ctx := context.Background() namespace := "default" - rayJob := rayJobTemplate("rayjob-test", namespace) + rayJob := rayJobTemplate("rayjob-test-deletionpolicy-deletecluster", namespace) + deletionPolicy := rayv1.DeleteClusterDeletionPolicy + rayJob.Spec.DeletionPolicy = &deletionPolicy + rayJob.Spec.ShutdownAfterJobFinishes = false rayCluster := &rayv1.RayCluster{} - It("Verify RayJob spec", func() { - // This test case simulates the most common scenario in the RayJob code path. - // (1) The submission mode is K8sJobMode. - // (2) `shutdownAfterJobFinishes` is true. - // In this test, RayJob passes through the following states: New -> Initializing -> Running -> Complete - Expect(rayJob.Spec.SubmissionMode).To(Equal(rayv1.K8sJobMode)) - Expect(rayJob.Spec.ShutdownAfterJobFinishes).To(BeTrue()) + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) - // This test assumes that there is only one worker group. - Expect(len(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs)).To(Equal(1)) + It("Verify RayJob spec", func() { + Expect(*rayJob.Spec.DeletionPolicy).To(Equal(rayv1.DeleteClusterDeletionPolicy)) }) It("Create a RayJob custom resource", func() { @@ -166,34 +889,10 @@ var _ = Context("RayJob in K8sJobMode", func() { It("Make RayCluster.Status.State to be rayv1.Ready", func() { // The RayCluster is not 'Ready' yet because Pods are not running and ready. - Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) - allPods := []corev1.Pod{} + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 - // Check whether the number of worker Pods is consistent with RayCluster CR or not. - numWorkerPods := int(*rayCluster.Spec.WorkerGroupSpecs[0].Replicas) - workerFilterLabels := client.MatchingLabels{utils.RayClusterLabelKey: rayCluster.Name, utils.RayNodeGroupLabelKey: rayCluster.Spec.WorkerGroupSpecs[0].GroupName} - workerPods := corev1.PodList{} - Eventually( - listResourceFunc(ctx, &workerPods, workerFilterLabels, &client.ListOptions{Namespace: namespace}), - time.Second*3, time.Millisecond*500).Should(Equal(int(numWorkerPods)), fmt.Sprintf("workerGroup: %v", workerPods.Items)) - - // The number of head Pods should be 1. - headPods := corev1.PodList{} - headFilterLabels := client.MatchingLabels{utils.RayClusterLabelKey: rayCluster.Name, utils.RayNodeGroupLabelKey: utils.RayNodeHeadGroupLabelValue} - Eventually( - listResourceFunc(ctx, &headPods, headFilterLabels, &client.ListOptions{Namespace: namespace}), - time.Second*3, time.Millisecond*500).Should(Equal(1), fmt.Sprintf("head Pod: %v", headPods.Items)) - - // Update all Pods, including head and worker Pods, to Running and PodReady. - allPods = append(allPods, headPods.Items...) - allPods = append(allPods, workerPods.Items...) - - for _, pod := range allPods { - pod.Status.Phase = corev1.PodRunning - // In envtest, if Pod.Status.Phase is set to running, the Pod's PodReady condition becomes true automatically. - // Check https://github.com/ray-project/kuberay/issues/1736 for more details. - Expect(k8sClient.Status().Update(ctx, &pod)).Should(BeNil()) - } + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) // The RayCluster.Status.State should be Ready. Eventually( @@ -218,7 +917,7 @@ var _ = Context("RayJob in K8sJobMode", func() { It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { // Update fake dashboard client to return job info with "Succeeded" status. - getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil } fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) @@ -240,7 +939,7 @@ var _ = Context("RayJob in K8sJobMode", func() { {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, } job.Status.Conditions = conditions - Expect(k8sClient.Status().Update(ctx, job)).Should(BeNil()) + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) // RayJob transitions to Complete. Eventually( @@ -248,7 +947,7 @@ var _ = Context("RayJob in K8sJobMode", func() { time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) }) - It("If shutdownAfterJobFinishes is true, RayCluster should be deleted but not the submitter Job.", func() { + It("If DeletionPolicy=DeleteCluster, RayCluster should be deleted, but not the submitter Job.", func() { Eventually( func() bool { return apierrors.IsNotFound(getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster)()) @@ -262,19 +961,270 @@ var _ = Context("RayJob in K8sJobMode", func() { }) }) - Describe("RayJob has passed the ActiveDeadlineSeconds", func() { + Describe("RayJob with DeletionPolicy=DeleteWorkers", Ordered, func() { ctx := context.Background() namespace := "default" - activeDeadlineSeconds := int32(3) - rayJob := rayJobTemplate("rayjob-deadline", namespace) - rayJob.Spec.ActiveDeadlineSeconds = pointer.Int32(activeDeadlineSeconds) + rayJob := rayJobTemplate("rayjob-test-deletionpolicy-deleteworkers", namespace) + deletionPolicy := rayv1.DeleteWorkersDeletionPolicy + rayJob.Spec.DeletionPolicy = &deletionPolicy + rayJob.Spec.ShutdownAfterJobFinishes = false + rayCluster := &rayv1.RayCluster{} + + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) It("Verify RayJob spec", func() { - // In this test, RayJob passes through the following states: New -> Initializing -> Complete (because of ActiveDeadlineSeconds). - Expect(rayJob.Spec.ActiveDeadlineSeconds).NotTo(BeNil()) + Expect(*rayJob.Spec.DeletionPolicy).To(Equal(rayv1.DeleteWorkersDeletionPolicy)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) - // This test assumes that there is only one worker group. - Expect(len(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs)).To(Equal(1)) + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { + // Update fake dashboard client to return job info with "Succeeded" status. + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + + // RayJob transitions to Complete. + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("If DeletionPolicy=DeleteWorkers, all workers should be deleted, but not the Head pod and submitter Job", func() { + // RayCluster exists + Consistently( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check worker group is suspended + Expect(*rayCluster.Spec.WorkerGroupSpecs[0].Suspend).To(BeTrue()) + + // 0 worker Pods exist + workerPods := corev1.PodList{} + workerLabels := common.RayClusterWorkerPodsAssociationOptions(rayCluster).ToListOptions() + Eventually( + listResourceFunc(ctx, &workerPods, workerLabels...), + time.Second*3, time.Millisecond*500).Should(Equal(0), "expected 0 workers") + + // Head Pod is still running + headPods := corev1.PodList{} + headLabels := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + Consistently( + listResourceFunc(ctx, &headPods, headLabels...), + time.Second*3, time.Millisecond*500).Should(Equal(1), "Head pod list should have only 1 Pod = %v", headPods.Items) + + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + Consistently( + getResourceFunc(ctx, namespacedName, job), + time.Second*3, time.Millisecond*500).Should(BeNil()) + }) + }) + + Describe("RayJob with DeletionPolicy=DeleteSelf", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-test-deleteself", namespace) + deletionPolicy := rayv1.DeleteSelfDeletionPolicy + rayJob.Spec.DeletionPolicy = &deletionPolicy + rayJob.Spec.ShutdownAfterJobFinishes = false + rayCluster := &rayv1.RayCluster{} + + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) + + It("Create a RayJob custom resource", func() { + err := k8sClient.Create(ctx, rayJob) + Expect(err).NotTo(HaveOccurred(), "Failed to create RayJob") + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "Should be able to see RayJob: %v", rayJob.Name) + }) + + It("RayJobs's JobDeploymentStatus transitions from New to Initializing.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusInitializing), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Initializing state, Status.RayClusterName, Status.JobId, and Status.StartTime must be set. + Expect(rayJob.Status.RayClusterName).NotTo(BeEmpty()) + Expect(rayJob.Status.JobId).NotTo(BeEmpty()) + Expect(rayJob.Status.StartTime).NotTo(BeNil()) + }) + + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { + // Update fake dashboard client to return job info with "Succeeded" status. + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + }) + + It("If DeletionPolicy=DeleteSelf, the RayJob is deleted", func() { + Eventually( + func() bool { + return apierrors.IsNotFound(k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob)) + }, time.Second*5, time.Millisecond*500).Should(BeTrue()) + }) + }) + + Describe("RayJob with DeletionPolicy=DeleteNone", Ordered, func() { + ctx := context.Background() + namespace := "default" + rayJob := rayJobTemplate("rayjob-test-deletionpolicy-deletenone", namespace) + deletionPolicy := rayv1.DeleteNoneDeletionPolicy + rayJob.Spec.DeletionPolicy = &deletionPolicy + rayJob.Spec.ShutdownAfterJobFinishes = false + rayCluster := &rayv1.RayCluster{} + + BeforeAll(func() { + DeferCleanup(features.SetFeatureGateDuringTest(GinkgoTB(), features.RayJobDeletionPolicy, true)) + }) + + It("Verify RayJob spec", func() { + Expect(*rayJob.Spec.DeletionPolicy).To(Equal(rayv1.DeleteNoneDeletionPolicy)) }) It("Create a RayJob custom resource", func() { @@ -296,12 +1246,113 @@ var _ = Context("RayJob in K8sJobMode", func() { Expect(rayJob.Status.StartTime).NotTo(BeNil()) }) - It("RayJobs has passed the activeDeadlineSeconds, and the JobDeploymentStatus transitions from Initializing to Complete.", func() { + It("In Initializing state, the RayCluster should eventually be created.", func() { + Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Check whether RayCluster is consistent with RayJob's RayClusterSpec. + Expect(rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(rayJob.Spec.RayClusterSpec.WorkerGroupSpecs[0].Replicas)) + Expect(rayCluster.Spec.RayVersion).To(Equal(rayJob.Spec.RayClusterSpec.RayVersion)) + + // TODO (kevin85421): Check the RayCluster labels and annotations. + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRNameLabelKey, rayJob.Name)) + Expect(rayCluster.Labels).Should(HaveKeyWithValue(utils.RayOriginatedFromCRDLabelKey, utils.RayOriginatedFromCRDLabelValue(utils.RayJobCRD))) + }) + + It("Make RayCluster.Status.State to be rayv1.Ready", func() { + // The RayCluster is not 'Ready' yet because Pods are not running and ready. + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + + updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) + + // The RayCluster.Status.State should be Ready. + Eventually( + getClusterState(ctx, namespace, rayCluster.Name), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.Ready)) + }) + + It("RayJobs's JobDeploymentStatus transitions from Initializing to Running.", func() { + Eventually( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // In Running state, the RayJob's Status.DashboardURL must be set. + Expect(rayJob.Status.DashboardURL).NotTo(BeEmpty()) + + // In Running state, the submitter Kubernetes Job must be created if this RayJob is in K8sJobMode. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + }) + + It("RayJobs's JobDeploymentStatus transitions from Running to Complete.", func() { + // Update fake dashboard client to return job info with "Succeeded" status. + getJobInfo := func(context.Context, string) (*utils.RayJobInfo, error) { //nolint:unparam // This is a mock function so parameters are required + return &utils.RayJobInfo{JobStatus: rayv1.JobStatusSucceeded}, nil + } + fakeRayDashboardClient.GetJobInfoMock.Store(&getJobInfo) + defer fakeRayDashboardClient.GetJobInfoMock.Store(nil) + + // RayJob transitions to Complete if and only if the corresponding submitter Kubernetes Job is Complete or Failed. + Consistently( + getRayJobDeploymentStatus(ctx, rayJob), + time.Second*3, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusRunning), "JobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + + // Update the submitter Kubernetes Job to Complete. + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + err := k8sClient.Get(ctx, namespacedName, job) + Expect(err).NotTo(HaveOccurred(), "failed to get Kubernetes Job") + + // Update the submitter Kubernetes Job to Complete. + conditions := []batchv1.JobCondition{ + {Type: batchv1.JobComplete, Status: corev1.ConditionTrue}, + } + job.Status.Conditions = conditions + Expect(k8sClient.Status().Update(ctx, job)).Should(Succeed()) + // RayJob transitions to Complete. Eventually( getRayJobDeploymentStatus(ctx, rayJob), - time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusFailed), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) - Expect(rayJob.Status.Reason).To(Equal(rayv1.DeadlineExceeded)) + time.Second*5, time.Millisecond*500).Should(Equal(rayv1.JobDeploymentStatusComplete), "jobDeploymentStatus = %v", rayJob.Status.JobDeploymentStatus) + }) + + It("If DeletionPolicy=DeleteNone, no resources are deleted", func() { + // RayJob exists + Consistently( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: namespace}, rayJob), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayJob %v not found", rayJob) + + // RayCluster exists + Consistently( + getResourceFunc(ctx, client.ObjectKey{Name: rayJob.Status.RayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(BeNil(), "RayCluster %v not found", rayJob.Status.RayClusterName) + + // Worker replicas set to 3 + Expect(*rayCluster.Spec.WorkerGroupSpecs[0].Replicas).To(Equal(int32(3))) + + // 3 worker Pods exist + workerPods := corev1.PodList{} + workerLabels := common.RayClusterWorkerPodsAssociationOptions(rayCluster).ToListOptions() + Consistently( + listResourceFunc(ctx, &workerPods, workerLabels...), + time.Second*3, time.Millisecond*500).Should(Equal(3), "expected 3 workers") + + // Head Pod is still running + headPods := corev1.PodList{} + headLabels := common.RayClusterHeadPodsAssociationOptions(rayCluster).ToListOptions() + Consistently( + listResourceFunc(ctx, &headPods, headLabels...), + time.Second*3, time.Millisecond*500).Should(Equal(1), "Head pod list should have only 1 Pod = %v", headPods.Items) + + namespacedName := common.RayJobK8sJobNamespacedName(rayJob) + job := &batchv1.Job{} + Consistently( + getResourceFunc(ctx, namespacedName, job), + time.Second*3, time.Millisecond*500).Should(BeNil()) }) }) }) diff --git a/ray-operator/controllers/ray/rayjob_controller_unit_test.go b/ray-operator/controllers/ray/rayjob_controller_unit_test.go index 44227f62ce0..bb8c05f0423 100644 --- a/ray-operator/controllers/ray/rayjob_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayjob_controller_unit_test.go @@ -2,10 +2,10 @@ package ray import ( "context" + "errors" + "strings" "testing" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - utils "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "github.com/stretchr/testify/assert" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -13,10 +13,18 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + utils "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" + "github.com/ray-project/kuberay/ray-operator/pkg/features" ) -func TestCreateK8sJobIfNeed(t *testing.T) { +func TestCreateRayJobSubmitterIfNeed(t *testing.T) { newScheme := runtime.NewScheme() _ = rayv1.AddToScheme(newScheme) _ = batchv1.AddToScheme(newScheme) @@ -147,28 +155,29 @@ func TestGetSubmitterTemplate(t *testing.T) { }, } - r := &RayJobReconciler{} ctx := context.Background() // Test 1: User provided template with command - submitterTemplate, err := r.getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) + submitterTemplate, err := getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) assert.NoError(t, err) assert.Equal(t, "user-command", submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command[0]) // Test 2: User provided template without command rayJobInstanceWithTemplate.Spec.SubmitterPodTemplate.Spec.Containers[utils.RayContainerIndex].Command = []string{} - submitterTemplate, err = r.getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) + submitterTemplate, err = getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) assert.NoError(t, err) - assert.Equal(t, []string{"ray", "job", "submit", "--address", "http://test-url", "--submission-id", "test-job-id", "--", "echo", "hello", "world"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command) + assert.Equal(t, []string{"/bin/sh"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command) + assert.Equal(t, []string{"-c", "if ray job status --address http://test-url test-job-id >/dev/null 2>&1 ; then ray job logs --address http://test-url --follow test-job-id ; else ray job submit --address http://test-url --submission-id test-job-id -- echo hello world ; fi"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Args) // Test 3: User did not provide template, should use the image of the Ray Head - submitterTemplate, err = r.getSubmitterTemplate(ctx, rayJobInstanceWithoutTemplate, rayClusterInstance) + submitterTemplate, err = getSubmitterTemplate(ctx, rayJobInstanceWithoutTemplate, rayClusterInstance) assert.NoError(t, err) - assert.Equal(t, []string{"ray", "job", "submit", "--address", "http://test-url", "--submission-id", "test-job-id", "--", "echo", "hello", "world"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command) + assert.Equal(t, []string{"/bin/sh"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Command) + assert.Equal(t, []string{"-c", "if ray job status --address http://test-url test-job-id >/dev/null 2>&1 ; then ray job logs --address http://test-url --follow test-job-id ; else ray job submit --address http://test-url --submission-id test-job-id -- echo hello world ; fi"}, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Args) assert.Equal(t, "rayproject/ray:custom-version", submitterTemplate.Spec.Containers[utils.RayContainerIndex].Image) // Test 4: Check default PYTHONUNBUFFERED setting - submitterTemplate, err = r.getSubmitterTemplate(ctx, rayJobInstanceWithoutTemplate, rayClusterInstance) + submitterTemplate, err = getSubmitterTemplate(ctx, rayJobInstanceWithoutTemplate, rayClusterInstance) assert.NoError(t, err) envVar, found := utils.EnvVarByName(PythonUnbufferedEnvVarName, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Env) @@ -176,7 +185,7 @@ func TestGetSubmitterTemplate(t *testing.T) { assert.Equal(t, "1", envVar.Value) // Test 5: Check default RAY_DASHBOARD_ADDRESS env var - submitterTemplate, err = r.getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) + submitterTemplate, err = getSubmitterTemplate(ctx, rayJobInstanceWithTemplate, nil) assert.NoError(t, err) envVar, found = utils.EnvVarByName(utils.RAY_DASHBOARD_ADDRESS, submitterTemplate.Spec.Containers[utils.RayContainerIndex].Env) @@ -193,8 +202,8 @@ func TestUpdateStatusToSuspendingIfNeeded(t *testing.T) { newScheme := runtime.NewScheme() _ = rayv1.AddToScheme(newScheme) tests := map[string]struct { - suspend bool status rayv1.JobDeploymentStatus + suspend bool expectedShouldUpdate bool }{ // When Autoscaler is enabled, the random Pod deletion is controleld by the feature flag `ENABLE_RANDOM_POD_DELETE`. @@ -232,20 +241,8 @@ func TestUpdateStatusToSuspendingIfNeeded(t *testing.T) { }, } - // Initialize a fake client with newScheme and runtimeObjects. - fakeClient := clientFake.NewClientBuilder(). - WithScheme(newScheme). - WithRuntimeObjects(rayJob). - WithStatusSubresource(rayJob).Build() ctx := context.Background() - - // Initialize a new RayClusterReconciler. - testRayJobReconciler := &RayJobReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: newScheme, - } - shouldUpdate := testRayJobReconciler.updateStatusToSuspendingIfNeeded(ctx, rayJob) + shouldUpdate := updateStatusToSuspendingIfNeeded(ctx, rayJob) assert.Equal(t, tc.expectedShouldUpdate, shouldUpdate) if tc.expectedShouldUpdate { @@ -325,7 +322,7 @@ func TestUpdateRayJobStatus(t *testing.T) { func TestValidateRayJobSpec(t *testing.T) { err := validateRayJobSpec(&rayv1.RayJob{}) - assert.Error(t, err, "The RayJob is invalid because both `RayClusterSpec` and `ClusterSelector` are empty") + assert.ErrorContains(t, err, "one of RayClusterSpec or ClusterSelector must be set") err = validateRayJobSpec(&rayv1.RayJob{ Spec: rayv1.RayJobSpec{ @@ -333,7 +330,7 @@ func TestValidateRayJobSpec(t *testing.T) { ShutdownAfterJobFinishes: false, }, }) - assert.Error(t, err, "The RayJob is invalid because a RayJob with shutdownAfterJobFinishes set to false is not allowed to be suspended.") + assert.ErrorContains(t, err, "a RayJob with shutdownAfterJobFinishes set to false is not allowed to be suspended") err = validateRayJobSpec(&rayv1.RayJob{ Spec: rayv1.RayJobSpec{ @@ -342,22 +339,296 @@ func TestValidateRayJobSpec(t *testing.T) { RayClusterSpec: &rayv1.RayClusterSpec{}, }, }) - assert.NoError(t, err, "The RayJob is valid.") + assert.NoError(t, err) err = validateRayJobSpec(&rayv1.RayJob{ Spec: rayv1.RayJobSpec{ - Suspend: true, + Suspend: true, + ShutdownAfterJobFinishes: true, ClusterSelector: map[string]string{ "key": "value", }, }, }) - assert.Error(t, err, "The RayJob is invalid because the ClusterSelector mode doesn't support the suspend operation.") + assert.ErrorContains(t, err, "the ClusterSelector mode doesn't support the suspend operation") err = validateRayJobSpec(&rayv1.RayJob{ Spec: rayv1.RayJobSpec{ RuntimeEnvYAML: "invalid_yaml_str", + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + }) + assert.ErrorContains(t, err, "failed to unmarshal RuntimeEnvYAML") + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + BackoffLimit: ptr.To[int32](-1), + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + }) + assert.ErrorContains(t, err, "backoffLimit must be a positive integer") + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy), + ShutdownAfterJobFinishes: true, + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + }) + assert.ErrorContains(t, err, "RayJobDeletionPolicy feature gate must be enabled to use the DeletionPolicy feature") + + defer features.SetFeatureGateDuringTest(t, features.RayJobDeletionPolicy, true)() + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy), + ClusterSelector: map[string]string{"key": "value"}, + }, + }) + assert.ErrorContains(t, err, "the ClusterSelector mode doesn't support DeletionPolicy=DeleteCluster") + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteWorkersDeletionPolicy), + ClusterSelector: map[string]string{"key": "value"}, + }, + }) + assert.ErrorContains(t, err, "the ClusterSelector mode doesn't support DeletionPolicy=DeleteWorkers") + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteWorkersDeletionPolicy), + RayClusterSpec: &rayv1.RayClusterSpec{ + EnableInTreeAutoscaling: ptr.To[bool](true), + }, + }, + }) + assert.ErrorContains(t, err, "DeletionPolicy=DeleteWorkers currently does not support RayCluster with autoscaling enabled") + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy), + ShutdownAfterJobFinishes: true, + RayClusterSpec: &rayv1.RayClusterSpec{}, }, }) - assert.Error(t, err, "The RayJob is invalid because the runtimeEnvYAML is invalid.") + assert.NoError(t, err) + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: nil, + ShutdownAfterJobFinishes: true, + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + }) + assert.NoError(t, err) + + err = validateRayJobSpec(&rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + DeletionPolicy: ptr.To(rayv1.DeleteNoneDeletionPolicy), + ShutdownAfterJobFinishes: true, + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + }) + assert.ErrorContains(t, err, "shutdownAfterJobFinshes is set to 'true' while deletion policy is 'DeleteNone'") +} + +func TestFailedToCreateRayJobSubmitterEvent(t *testing.T) { + rayJob := &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rayjob", + Namespace: "default", + }, + } + + submitterTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-submit-pod", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "ray-submit", + Image: "rayproject/ray:latest", + }, + }, + }, + } + + fakeClient := clientFake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ + Create: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.CreateOption) error { + return errors.New("random") + }, + }).WithScheme(scheme.Scheme).Build() + + recorder := record.NewFakeRecorder(100) + + reconciler := &RayJobReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: scheme.Scheme, + } + + err := reconciler.createNewK8sJob(context.Background(), rayJob, submitterTemplate) + + assert.NotNil(t, err, "Expected error due to simulated job creation failure") + + var foundFailureEvent bool + events := []string{} + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, "Failed to create new Kubernetes Job") { + foundFailureEvent = true + break + } + events = append(events, event) + } + + assert.Truef(t, foundFailureEvent, "Expected event to be generated for job creation failure, got events: %s", strings.Join(events, "\n")) +} + +func TestFailedCreateRayClusterEvent(t *testing.T) { + rayJob := &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rayjob", + Namespace: "default", + }, + Spec: rayv1.RayJobSpec{ + RayClusterSpec: &rayv1.RayClusterSpec{}, + }, + } + + fakeClient := clientFake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ + Create: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.CreateOption) error { + return errors.New("random") + }, + }).WithScheme(scheme.Scheme).Build() + + recorder := record.NewFakeRecorder(100) + + reconciler := &RayJobReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: scheme.Scheme, + } + + _, err := reconciler.getOrCreateRayClusterInstance(context.Background(), rayJob) + + assert.NotNil(t, err, "Expected error due to cluster creation failure") + + var foundFailureEvent bool + events := []string{} + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, "Failed to create RayCluster") { + foundFailureEvent = true + break + } + events = append(events, event) + } + + assert.Truef(t, foundFailureEvent, "Expected event to be generated for cluster creation failure, got events: %s", strings.Join(events, "\n")) +} + +func TestFailedDeleteRayJobSubmitterEvent(t *testing.T) { + newScheme := runtime.NewScheme() + _ = batchv1.AddToScheme(newScheme) + + rayJob := &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rayjob", + Namespace: "default", + }, + } + submitter := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rayjob", + Namespace: "default", + }, + } + + fakeClient := clientFake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ + Delete: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.DeleteOption) error { + return errors.New("random") + }, + }).WithScheme(newScheme).WithRuntimeObjects(submitter).Build() + + recorder := record.NewFakeRecorder(100) + + reconciler := &RayJobReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: scheme.Scheme, + } + + _, err := reconciler.deleteSubmitterJob(context.Background(), rayJob) + + assert.NotNil(t, err, "Expected error due to job deletion failure") + + var foundFailureEvent bool + events := []string{} + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, "Failed to delete submitter K8s Job") { + foundFailureEvent = true + break + } + events = append(events, event) + } + + assert.Truef(t, foundFailureEvent, "Expected event to be generated for cluster deletion failure, got events: %s", strings.Join(events, "\n")) +} + +func TestFailedDeleteRayClusterEvent(t *testing.T) { + newScheme := runtime.NewScheme() + _ = rayv1.AddToScheme(newScheme) + + rayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-raycluster", + Namespace: "default", + }, + } + + rayJob := &rayv1.RayJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rayjob", + Namespace: "default", + }, + Status: rayv1.RayJobStatus{ + RayClusterName: "test-raycluster", + }, + } + + fakeClient := clientFake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ + Delete: func(_ context.Context, _ client.WithWatch, _ client.Object, _ ...client.DeleteOption) error { + return errors.New("random") + }, + }).WithScheme(newScheme).WithRuntimeObjects(rayCluster).Build() + + recorder := record.NewFakeRecorder(100) + + reconciler := &RayJobReconciler{ + Client: fakeClient, + Recorder: recorder, + Scheme: scheme.Scheme, + } + + _, err := reconciler.deleteClusterResources(context.Background(), rayJob) + + assert.NotNil(t, err, "Expected error due to cluster deletion failure") + + var foundFailureEvent bool + events := []string{} + for len(recorder.Events) > 0 { + event := <-recorder.Events + if strings.Contains(event, "Failed to delete cluster") { + foundFailureEvent = true + break + } + events = append(events, event) + } + + assert.Truef(t, foundFailureEvent, "Expected event to be generated for cluster deletion failure, got events: %s", strings.Join(events, "\n")) } diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index e2f45ea9305..2697afdc3a5 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -2,27 +2,31 @@ package ray import ( "context" + errstd "errors" "fmt" + "math" "os" - "reflect" "strconv" "strings" "time" "k8s.io/apimachinery/pkg/util/json" "k8s.io/apimachinery/pkg/util/yaml" - - networkingv1 "k8s.io/api/networking/v1" + "k8s.io/utils/lru" "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/pkg/features" cmap "github.com/orcaman/concurrent-map/v2" "github.com/go-logr/logr" - fmtErrors "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -30,8 +34,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" - "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -50,22 +52,25 @@ type RayServiceReconciler struct { client.Client Scheme *runtime.Scheme Recorder record.EventRecorder - // Currently, the Ray dashboard doesn't cache the Serve deployment config. + // Currently, the Ray dashboard doesn't cache the Serve application config. // To avoid reapplying the same config repeatedly, cache the config in this map. - ServeConfigs cmap.ConcurrentMap[string, string] + // Cache key is the combination of RayService namespace and name. + // Cache value is map of RayCluster name to Serve application config. + ServeConfigs *lru.Cache RayClusterDeletionTimestamps cmap.ConcurrentMap[string, time.Time] - - dashboardClientFunc func() utils.RayDashboardClientInterface - httpProxyClientFunc func() utils.RayHttpProxyClientInterface + dashboardClientFunc func() utils.RayDashboardClientInterface + httpProxyClientFunc func() utils.RayHttpProxyClientInterface } // NewRayServiceReconciler returns a new reconcile.Reconciler -func NewRayServiceReconciler(ctx context.Context, mgr manager.Manager, dashboardClientFunc func() utils.RayDashboardClientInterface, httpProxyClientFunc func() utils.RayHttpProxyClientInterface) *RayServiceReconciler { +func NewRayServiceReconciler(_ context.Context, mgr manager.Manager, provider utils.ClientProvider) *RayServiceReconciler { + dashboardClientFunc := provider.GetDashboardClient(mgr) + httpProxyClientFunc := provider.GetHttpProxyClient(mgr) return &RayServiceReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Recorder: mgr.GetEventRecorderFor("rayservice-controller"), - ServeConfigs: cmap.New[string](), + ServeConfigs: lru.New(utils.ServeConfigLRUSize), RayClusterDeletionTimestamps: cmap.New[time.Time](), dashboardClientFunc: dashboardClientFunc, @@ -82,13 +87,12 @@ func NewRayServiceReconciler(ctx context.Context, mgr manager.Manager, dashboard // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list +// +kubebuilder:rbac:groups=core,resources=pods/proxy,verbs=get;update;patch +// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=core,resources=services/proxy,verbs=get;update;patch // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update -// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingressclasses,verbs=get;list;watch -// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;delete;patch -// +kubebuilder:rbac:groups=extensions,resources=ingresses,verbs=get;list;watch;create;update;delete;patch // +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=get;list;watch;create;delete;update // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=get;list;watch;create;delete @@ -104,17 +108,21 @@ func NewRayServiceReconciler(ctx context.Context, mgr manager.Manager, dashboard func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) - var isReady bool = false - var rayServiceInstance *rayv1.RayService var err error - var ctrlResult ctrl.Result // Resolve the CR from request. if rayServiceInstance, err = r.getRayServiceInstance(ctx, request); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } originalRayServiceInstance := rayServiceInstance.DeepCopy() + + if err := validateRayServiceSpec(rayServiceInstance); err != nil { + r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.InvalidRayServiceSpec), + "The RayService spec is invalid %s/%s: %v", rayServiceInstance.Namespace, rayServiceInstance.Name, err) + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err + } + r.cleanUpServeConfigCache(ctx, rayServiceInstance) // TODO (kevin85421): ObservedGeneration should be used to determine whether to update this CR or not. @@ -124,7 +132,6 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque var activeRayClusterInstance *rayv1.RayCluster var pendingRayClusterInstance *rayv1.RayCluster if activeRayClusterInstance, pendingRayClusterInstance, err = r.reconcileRayCluster(ctx, rayServiceInstance); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.FailedToGetOrCreateRayCluster, err) return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, client.IgnoreNotFound(err) } @@ -139,18 +146,35 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } + if activeRayClusterInstance == nil && pendingRayClusterInstance == nil { + panic("Both active and pending Ray clusters are nil before reconcileServe. " + + "Please open a GitHub issue in the KubeRay repository.") + } + + // Check both active and pending Ray clusters to see if the head Pod is ready to serve requests. + // This is important to ensure the reliability of the serve service because the head Pod cannot + // rely on readiness probes to determine serve readiness. + if err := r.updateHeadPodServeLabel(ctx, activeRayClusterInstance, rayServiceInstance.Spec.ExcludeHeadPodFromServeSvc); err != nil { + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err + } + if err := r.updateHeadPodServeLabel(ctx, pendingRayClusterInstance, rayServiceInstance.Spec.ExcludeHeadPodFromServeSvc); err != nil { + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err + } + /* - Update ray cluster for 4 possible situations. - If a ray cluster does not exist, clear its status. - If only one ray cluster exists, do serve deployment if needed and check dashboard, serve deployment health. - If both ray clusters exist, update active cluster status and do the pending cluster deployment and health check. + Update Ray cluster for the following possible situations: + 1. If a Ray cluster does not exist, clear its status. + 2. If only one Ray cluster exists, perform Serve deployment if needed and check Dashboard and Serve deployment health. + 3. If both Ray clusters exist, update active cluster status and perform pending cluster deployment and health check. */ + var isActiveClusterReady, isPendingClusterReady bool = false, false + if activeRayClusterInstance != nil && pendingRayClusterInstance == nil { logger.Info("Reconciling the Serve component. Only the active Ray cluster exists.") rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{} - if ctrlResult, isReady, err = r.reconcileServe(ctx, rayServiceInstance, activeRayClusterInstance, true); err != nil { + if isActiveClusterReady, err = r.reconcileServe(ctx, rayServiceInstance, activeRayClusterInstance, true); err != nil { logger.Error(err, "Fail to reconcileServe.") - return ctrlResult, nil + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } } else if activeRayClusterInstance != nil && pendingRayClusterInstance != nil { logger.Info("Reconciling the Serve component. Active and pending Ray clusters exist.") @@ -159,56 +183,47 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque logger.Error(err, "Failed to update active Ray cluster's status.") } - if ctrlResult, isReady, err = r.reconcileServe(ctx, rayServiceInstance, pendingRayClusterInstance, false); err != nil { + if isPendingClusterReady, err = r.reconcileServe(ctx, rayServiceInstance, pendingRayClusterInstance, false); err != nil { logger.Error(err, "Fail to reconcileServe.") - return ctrlResult, nil + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } } else if activeRayClusterInstance == nil && pendingRayClusterInstance != nil { rayServiceInstance.Status.ActiveServiceStatus = rayv1.RayServiceStatus{} - if ctrlResult, isReady, err = r.reconcileServe(ctx, rayServiceInstance, pendingRayClusterInstance, false); err != nil { + if isPendingClusterReady, err = r.reconcileServe(ctx, rayServiceInstance, pendingRayClusterInstance, false); err != nil { logger.Error(err, "Fail to reconcileServe.") - return ctrlResult, nil + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } - } else { - logger.Info("Reconciling the Serve component. No Ray cluster exists.") - rayServiceInstance.Status.ActiveServiceStatus = rayv1.RayServiceStatus{} - rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{} } - if !isReady { - logger.Info(fmt.Sprintf("Ray Serve applications are not ready to serve requests: checking again in %ss", ServiceDefaultRequeueDuration)) - r.Recorder.Eventf(rayServiceInstance, "Normal", "ServiceNotReady", "The service is not ready yet. Controller will perform a round of actions in %s.", ServiceDefaultRequeueDuration) + if !isActiveClusterReady && !isPendingClusterReady { + logger.Info("Ray Serve applications are not ready to serve requests") return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } - // Get the ready Ray cluster instance for service and ingress update. + // Switch pending cluster to active cluster if pending cluster is ready + // to serve requests. + if isPendingClusterReady { + promotePendingClusterToActiveCluster(ctx, rayServiceInstance) + // TODO: update K8s service to switch the RayCluster to pending cluster. + } + + // Get the ready Ray cluster instance for service update. var rayClusterInstance *rayv1.RayCluster if pendingRayClusterInstance != nil { rayClusterInstance = pendingRayClusterInstance - logger.Info("Reconciling the ingress and service resources " + + logger.Info("Reconciling the service resources " + "on the pending Ray cluster.") } else if activeRayClusterInstance != nil { rayClusterInstance = activeRayClusterInstance - logger.Info("Reconciling the ingress and service resources " + + logger.Info("Reconciling the service resources " + "on the active Ray cluster. No pending Ray cluster found.") - } else { - rayClusterInstance = nil - logger.Info("No Ray cluster found. Skipping ingress and service reconciliation.") } - if rayClusterInstance != nil { - if err := r.reconcileServices(ctx, rayServiceInstance, rayClusterInstance, utils.HeadService); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.FailedToUpdateService, err) - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err - } - if err := r.labelHeadPodForServeStatus(ctx, rayClusterInstance); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.FailedToUpdateServingPodLabel, err) - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err - } - if err := r.reconcileServices(ctx, rayServiceInstance, rayClusterInstance, utils.ServingService); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.FailedToUpdateService, err) - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err - } + if err := r.reconcileServices(ctx, rayServiceInstance, rayClusterInstance, utils.HeadService); err != nil { + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err + } + if err := r.reconcileServices(ctx, rayServiceInstance, rayClusterInstance, utils.ServingService); err != nil { + return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err } if err := r.calculateStatus(ctx, rayServiceInstance); err != nil { @@ -216,10 +231,9 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque } // Final status update for any CR modification. - if r.inconsistentRayServiceStatuses(ctx, originalRayServiceInstance.Status, rayServiceInstance.Status) { + if inconsistentRayServiceStatuses(ctx, originalRayServiceInstance.Status, rayServiceInstance.Status) { rayServiceInstance.Status.LastUpdateTime = &metav1.Time{Time: time.Now()} if errStatus := r.Status().Update(ctx, rayServiceInstance); errStatus != nil { - logger.Error(errStatus, "Failed to update RayService status", "rayServiceInstance", rayServiceInstance) return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, errStatus } } @@ -227,11 +241,24 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil } +func validateRayServiceSpec(rayService *rayv1.RayService) error { + if headSvc := rayService.Spec.RayClusterSpec.HeadGroupSpec.HeadService; headSvc != nil && headSvc.Name != "" { + return fmt.Errorf("spec.rayClusterConfig.headGroupSpec.headService.metadata.name should not be set") + } + + // only NewCluster and None are valid upgradeType + if rayService.Spec.UpgradeStrategy != nil && + rayService.Spec.UpgradeStrategy.Type != nil && + *rayService.Spec.UpgradeStrategy.Type != rayv1.None && + *rayService.Spec.UpgradeStrategy.Type != rayv1.NewCluster { + return fmt.Errorf("Spec.UpgradeStrategy.Type value %s is invalid, valid options are %s or %s", *rayService.Spec.UpgradeStrategy.Type, rayv1.NewCluster, rayv1.None) + } + return nil +} + func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceInstance *rayv1.RayService) error { - logger := ctrl.LoggerFrom(ctx) serveEndPoints := &corev1.Endpoints{} if err := r.Get(ctx, common.RayServiceServeServiceNamespacedName(rayServiceInstance), serveEndPoints); err != nil && !errors.IsNotFound(err) { - logger.Error(err, "Fail to retrieve the Kubernetes Endpoints from the cluster!") return err } @@ -241,7 +268,10 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn for _, subset := range serveEndPoints.Subsets { numServeEndpoints += len(subset.Addresses) } - rayServiceInstance.Status.NumServeEndpoints = int32(numServeEndpoints) + if numServeEndpoints > math.MaxInt32 { + return errstd.New("numServeEndpoints exceeds math.MaxInt32") + } + rayServiceInstance.Status.NumServeEndpoints = int32(numServeEndpoints) //nolint:gosec // This is a false positive from gosec. See https://github.com/securego/gosec/issues/1212 for more details. return nil } @@ -249,10 +279,10 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn // If the only difference between the old and new status is the HealthLastUpdateTime field, // the status update will not be triggered. // The RayClusterStatus field is only for observability in RayService CR, and changes to it will not trigger the status update. -func (r *RayServiceReconciler) inconsistentRayServiceStatus(ctx context.Context, oldStatus rayv1.RayServiceStatus, newStatus rayv1.RayServiceStatus) bool { +func inconsistentRayServiceStatus(ctx context.Context, oldStatus rayv1.RayServiceStatus, newStatus rayv1.RayServiceStatus) bool { logger := ctrl.LoggerFrom(ctx) if oldStatus.RayClusterName != newStatus.RayClusterName { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService RayClusterName changed from %s to %s", oldStatus.RayClusterName, newStatus.RayClusterName)) + logger.Info("inconsistentRayServiceStatus RayService RayClusterName", "oldRayClusterName", oldStatus.RayClusterName, "newRayClusterName", newStatus.RayClusterName) return true } @@ -264,15 +294,15 @@ func (r *RayServiceReconciler) inconsistentRayServiceStatus(ctx context.Context, for appName, newAppStatus := range newStatus.Applications { var oldAppStatus rayv1.AppStatus if oldAppStatus, ok = oldStatus.Applications[appName]; !ok { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService new application %s found", appName)) + logger.Info("inconsistentRayServiceStatus RayService new application found", "appName", appName) return true } if oldAppStatus.Status != newAppStatus.Status { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService application %s status changed from %v to %v", appName, oldAppStatus.Status, newAppStatus.Status)) + logger.Info("inconsistentRayServiceStatus RayService application status changed", "appName", appName, "oldStatus", oldAppStatus.Status, "newStatus", newAppStatus.Status) return true } else if oldAppStatus.Message != newAppStatus.Message { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService application %s status message changed from %v to %v", appName, oldAppStatus.Message, newAppStatus.Message)) + logger.Info("inconsistentRayServiceStatus RayService application status message changed", "appName", appName, "oldStatus", oldAppStatus.Message, "newStatus", newAppStatus.Message) return true } @@ -283,15 +313,15 @@ func (r *RayServiceReconciler) inconsistentRayServiceStatus(ctx context.Context, for deploymentName, newDeploymentStatus := range newAppStatus.Deployments { var oldDeploymentStatus rayv1.ServeDeploymentStatus if oldDeploymentStatus, ok = oldAppStatus.Deployments[deploymentName]; !ok { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService new deployment %s found in application %s", deploymentName, appName)) + logger.Info("inconsistentRayServiceStatus RayService new deployment found in application", "deploymentName", deploymentName, "appName", appName) return true } if oldDeploymentStatus.Status != newDeploymentStatus.Status { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService DeploymentStatus changed from %v to %v", oldDeploymentStatus.Status, newDeploymentStatus.Status)) + logger.Info("inconsistentRayServiceStatus RayService DeploymentStatus changed", "oldDeploymentStatus", oldDeploymentStatus.Status, "newDeploymentStatus", newDeploymentStatus.Status) return true } else if oldDeploymentStatus.Message != newDeploymentStatus.Message { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService deployment status message changed from %v to %v", oldDeploymentStatus.Message, newDeploymentStatus.Message)) + logger.Info("inconsistentRayServiceStatus RayService deployment status message changed", "oldDeploymentStatus", oldDeploymentStatus.Message, "newDeploymentStatus", newDeploymentStatus.Message) return true } } @@ -301,24 +331,24 @@ func (r *RayServiceReconciler) inconsistentRayServiceStatus(ctx context.Context, } // Determine whether to update the status of the RayService instance. -func (r *RayServiceReconciler) inconsistentRayServiceStatuses(ctx context.Context, oldStatus rayv1.RayServiceStatuses, newStatus rayv1.RayServiceStatuses) bool { +func inconsistentRayServiceStatuses(ctx context.Context, oldStatus rayv1.RayServiceStatuses, newStatus rayv1.RayServiceStatuses) bool { logger := ctrl.LoggerFrom(ctx) if oldStatus.ServiceStatus != newStatus.ServiceStatus { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService ServiceStatus changed from %s to %s", oldStatus.ServiceStatus, newStatus.ServiceStatus)) + logger.Info("inconsistentRayServiceStatus RayService ServiceStatus changed", "oldServiceStatus", oldStatus.ServiceStatus, "newServiceStatus", newStatus.ServiceStatus) return true } if oldStatus.NumServeEndpoints != newStatus.NumServeEndpoints { - logger.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService NumServeEndpoints changed from %d to %d", oldStatus.NumServeEndpoints, newStatus.NumServeEndpoints)) + logger.Info("inconsistentRayServiceStatus RayService NumServeEndpoints changed", "oldNumServeEndpoints", oldStatus.NumServeEndpoints, "newNumServeEndpoints", newStatus.NumServeEndpoints) return true } - if r.inconsistentRayServiceStatus(ctx, oldStatus.ActiveServiceStatus, newStatus.ActiveServiceStatus) { + if inconsistentRayServiceStatus(ctx, oldStatus.ActiveServiceStatus, newStatus.ActiveServiceStatus) { logger.Info("inconsistentRayServiceStatus RayService ActiveServiceStatus changed") return true } - if r.inconsistentRayServiceStatus(ctx, oldStatus.PendingServiceStatus, newStatus.PendingServiceStatus) { + if inconsistentRayServiceStatus(ctx, oldStatus.PendingServiceStatus, newStatus.PendingServiceStatus) { logger.Info("inconsistentRayServiceStatus RayService PendingServiceStatus changed") return true } @@ -327,7 +357,7 @@ func (r *RayServiceReconciler) inconsistentRayServiceStatuses(ctx context.Contex } // SetupWithManager sets up the controller with the Manager. -func (r *RayServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *RayServiceReconciler) SetupWithManager(mgr ctrl.Manager, reconcileConcurrency int) error { return ctrl.NewControllerManagedBy(mgr). For(&rayv1.RayService{}, builder.WithPredicates(predicate.Or( predicate.GenerationChangedPredicate{}, @@ -336,8 +366,8 @@ func (r *RayServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { ))). Owns(&rayv1.RayCluster{}). Owns(&corev1.Service{}). - Owns(&networkingv1.Ingress{}). WithOptions(controller.Options{ + MaxConcurrentReconciles: reconcileConcurrency, LogConstructor: func(request *reconcile.Request) logr.Logger { logger := ctrl.Log.WithName("controllers").WithName("RayService") if request != nil { @@ -363,13 +393,30 @@ func (r *RayServiceReconciler) getRayServiceInstance(ctx context.Context, reques return rayServiceInstance, nil } -func (r *RayServiceReconciler) updateState(ctx context.Context, rayServiceInstance *rayv1.RayService, status rayv1.ServiceStatus, err error) error { - rayServiceInstance.Status.ServiceStatus = status - if errStatus := r.Status().Update(ctx, rayServiceInstance); errStatus != nil { - return fmtErrors.Errorf("combined error: %v %v", err, errStatus) +func isZeroDowntimeUpgradeEnabled(ctx context.Context, rayService *rayv1.RayService) bool { + // For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously. + // Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades. + // There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeStrategy.Type. + // If no fields are set, zero downtime upgrade by default is enabled. + // Spec.UpgradeStrategy.Type takes precedence over ENABLE_ZERO_DOWNTIME. + logger := ctrl.LoggerFrom(ctx) + upgradeStrategy := rayService.Spec.UpgradeStrategy + if upgradeStrategy != nil { + upgradeType := upgradeStrategy.Type + if upgradeType != nil { + if *upgradeType != rayv1.NewCluster { + logger.Info("Zero-downtime upgrade is disabled because UpgradeStrategy.Type is not set to NewCluster.") + return false + } + return true + } } - r.Recorder.Event(rayServiceInstance, "Normal", string(status), err.Error()) - return err + zeroDowntimeEnvVar := os.Getenv(ENABLE_ZERO_DOWNTIME) + if strings.ToLower(zeroDowntimeEnvVar) == "false" { + logger.Info("Zero-downtime upgrade is disabled because ENABLE_ZERO_DOWNTIME is set to false.") + return false + } + return true } // reconcileRayCluster checks the active and pending ray cluster instances. It includes 3 parts. @@ -394,24 +441,27 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi return nil, nil, err } - clusterAction := r.shouldPrepareNewRayCluster(ctx, rayServiceInstance, activeRayCluster) - if clusterAction == RolloutNew { - // For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously. - // Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades. - enableZeroDowntime := true - if s := os.Getenv(ENABLE_ZERO_DOWNTIME); strings.ToLower(s) == "false" { - enableZeroDowntime = false + clusterAction := decideClusterAction(ctx, rayServiceInstance, activeRayCluster, pendingRayCluster) + switch clusterAction { + case GeneratePendingClusterName: + markPreparingNewCluster(rayServiceInstance) + return activeRayCluster, nil, nil + case CreatePendingCluster: + logger.Info("Creating a new pending RayCluster instance.") + pendingRayCluster, err = r.createRayClusterInstance(ctx, rayServiceInstance) + return activeRayCluster, pendingRayCluster, err + case UpdatePendingCluster: + logger.Info("Updating the pending RayCluster instance.") + pendingRayCluster, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, pendingRayCluster.Name) + if err != nil { + return nil, nil, err } - if enableZeroDowntime || !enableZeroDowntime && activeRayCluster == nil { - // Add a pending cluster name. In the next reconcile loop, shouldPrepareNewRayCluster will return DoNothing and we will - // actually create the pending RayCluster instance. - r.markRestartAndAddPendingClusterName(ctx, rayServiceInstance) - } else { - logger.Info("Zero-downtime upgrade is disabled (ENABLE_ZERO_DOWNTIME: false). Skip preparing a new RayCluster.") + err = r.updateRayClusterInstance(ctx, pendingRayCluster) + if err != nil { + return nil, nil, err } - return activeRayCluster, nil, nil - } else if clusterAction == Update { - // Update the active cluster. + return activeRayCluster, pendingRayCluster, nil + case UpdateActiveCluster: logger.Info("Updating the active RayCluster instance.") if activeRayCluster, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, activeRayCluster.Name); err != nil { return nil, nil, err @@ -420,39 +470,35 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi return nil, nil, err } return activeRayCluster, nil, nil + case DoNothing: + return activeRayCluster, pendingRayCluster, nil + default: + panic(fmt.Sprintf("Unexpected clusterAction: %v", clusterAction)) } - - if pendingRayCluster, err = r.createRayClusterInstanceIfNeeded(ctx, rayServiceInstance, pendingRayCluster); err != nil { - return nil, nil, err - } - - return activeRayCluster, pendingRayCluster, nil } // cleanUpRayClusterInstance cleans up all the dangling RayCluster instances that are owned by the RayService instance. func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, rayServiceInstance *rayv1.RayService) error { logger := ctrl.LoggerFrom(ctx) rayClusterList := rayv1.RayClusterList{} - filterLabels := client.MatchingLabels{ - utils.RayOriginatedFromCRNameLabelKey: rayServiceInstance.Name, - utils.RayOriginatedFromCRDLabelKey: utils.RayOriginatedFromCRDLabelValue(utils.RayServiceCRD), - } + var err error - if err = r.List(ctx, &rayClusterList, client.InNamespace(rayServiceInstance.Namespace), filterLabels); err != nil { - logger.Error(err, "Fail to list RayCluster for "+rayServiceInstance.Name) + if err = r.List(ctx, &rayClusterList, common.RayServiceRayClustersAssociationOptions(rayServiceInstance).ToListOptions()...); err != nil { return err } // Clean up RayCluster instances. Each instance is deleted 60 seconds - // after becoming inactive to give the ingress time to update. for _, rayClusterInstance := range rayClusterList.Items { if rayClusterInstance.Name != rayServiceInstance.Status.ActiveServiceStatus.RayClusterName && rayClusterInstance.Name != rayServiceInstance.Status.PendingServiceStatus.RayClusterName { cachedTimestamp, exists := r.RayClusterDeletionTimestamps.Get(rayClusterInstance.Name) if !exists { deletionTimestamp := metav1.Now().Add(RayClusterDeletionDelayDuration) r.RayClusterDeletionTimestamps.Set(rayClusterInstance.Name, deletionTimestamp) - logger.Info(fmt.Sprintf("Scheduled dangling RayCluster "+ - "%s for deletion at %s", rayClusterInstance.Name, deletionTimestamp)) + logger.Info( + "Scheduled dangling RayCluster for deletion", + "rayClusterName", rayClusterInstance.Name, + "deletionTimestamp", deletionTimestamp, + ) } else { reasonForDeletion := "" if time.Since(cachedTimestamp) > 0*time.Second { @@ -464,7 +510,6 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra if reasonForDeletion != "" { logger.Info("reconcileRayCluster", "delete Ray cluster", rayClusterInstance.Name, "reason", reasonForDeletion) if err := r.Delete(ctx, &rayClusterInstance, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { - logger.Error(err, "Fail to delete RayCluster "+rayClusterInstance.Name) return err } } @@ -476,164 +521,164 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra } func (r *RayServiceReconciler) getRayClusterByNamespacedName(ctx context.Context, clusterKey client.ObjectKey) (*rayv1.RayCluster, error) { - logger := ctrl.LoggerFrom(ctx) + if clusterKey.Name == "" { + return nil, nil + } + rayCluster := &rayv1.RayCluster{} - if clusterKey.Name != "" { - // Ignore not found since in that case we should return RayCluster as nil. - if err := r.Get(ctx, clusterKey, rayCluster); client.IgnoreNotFound(err) != nil { - logger.Error(err, "Fail to get RayCluster "+clusterKey.String()) - return nil, err - } - } else { - rayCluster = nil + if err := r.Get(ctx, clusterKey, rayCluster); client.IgnoreNotFound(err) != nil { + return nil, err } return rayCluster, nil } -// cleanUpServeConfigCache cleans up the unused serve deployments config in the cached map. +// cleanUpServeConfigCache cleans up the unused serve applications config in the cached map. func (r *RayServiceReconciler) cleanUpServeConfigCache(ctx context.Context, rayServiceInstance *rayv1.RayService) { logger := ctrl.LoggerFrom(ctx) - activeConfigKey := r.generateConfigKey(rayServiceInstance, rayServiceInstance.Status.ActiveServiceStatus.RayClusterName) - pendingConfigKey := r.generateConfigKey(rayServiceInstance, rayServiceInstance.Status.PendingServiceStatus.RayClusterName) - configPrefix := r.generateConfigKeyPrefix(rayServiceInstance) + activeRayClusterName := rayServiceInstance.Status.ActiveServiceStatus.RayClusterName + pendingRayClusterName := rayServiceInstance.Status.PendingServiceStatus.RayClusterName - // Clean up RayCluster serve deployment configs. - for key := range r.ServeConfigs.Items() { - if key == activeConfigKey || key == pendingConfigKey { - continue - } - if !strings.HasPrefix(key, configPrefix) { - // Skip configs owned by other RayService Instance. + cacheKey := rayServiceInstance.Namespace + "/" + rayServiceInstance.Name + cacheValue, exist := r.ServeConfigs.Get(cacheKey) + if !exist { + return + } + clusterNameToServeConfig := cacheValue.(cmap.ConcurrentMap[string, string]) + + for key := range clusterNameToServeConfig.Items() { + if key == activeRayClusterName || key == pendingRayClusterName { continue } - logger.Info("cleanUpServeConfigCache", "activeConfigKey", activeConfigKey, "pendingConfigKey", pendingConfigKey, "remove key", key) - r.ServeConfigs.Remove(key) + logger.Info("Remove stale serve application config", "remove key", key, "activeRayClusterName", activeRayClusterName, "pendingRayClusterName", pendingRayClusterName) + clusterNameToServeConfig.Remove(key) } } type ClusterAction int const ( - DoNothing ClusterAction = iota // value 0 - Update // value 1 - RolloutNew // value 2 + DoNothing ClusterAction = iota + UpdateActiveCluster + UpdatePendingCluster + GeneratePendingClusterName + CreatePendingCluster ) -// shouldPrepareNewRayCluster checks if we need to generate a new pending cluster. -func (r *RayServiceReconciler) shouldPrepareNewRayCluster(ctx context.Context, rayServiceInstance *rayv1.RayService, activeRayCluster *rayv1.RayCluster) ClusterAction { +// decideClusterAction decides the action to take for the underlying RayCluster instances. +// Prepare new RayCluster if: +// 1. No active cluster and no pending cluster +// 2. No pending cluster, and the active RayCluster has changed. +func decideClusterAction(ctx context.Context, rayServiceInstance *rayv1.RayService, activeRayCluster, pendingRayCluster *rayv1.RayCluster) ClusterAction { logger := ctrl.LoggerFrom(ctx) - // Prepare new RayCluster if: - // 1. No active cluster and no pending cluster - // 2. No pending cluster, and the active RayCluster has changed. - if rayServiceInstance.Status.PendingServiceStatus.RayClusterName == "" { - if activeRayCluster == nil { - logger.Info("No active Ray cluster. RayService operator should prepare a new Ray cluster.") - return RolloutNew - } - // Case 1: If everything is identical except for the Replicas and WorkersToDelete of + // Handle pending RayCluster cases. + if rayServiceInstance.Status.PendingServiceStatus.RayClusterName != "" { + oldSpec := pendingRayCluster.Spec + newSpec := rayServiceInstance.Spec.RayClusterSpec + // If everything is identical except for the Replicas and WorkersToDelete of // each WorkerGroup, then do nothing. - activeClusterHash := activeRayCluster.ObjectMeta.Annotations[utils.HashWithoutReplicasAndWorkersToDeleteKey] - goalClusterHash, err := generateHashWithoutReplicasAndWorkersToDelete(rayServiceInstance.Spec.RayClusterSpec) - errContextFailedToSerialize := "Failed to serialize new RayCluster config. " + - "Manual config updates will NOT be tracked accurately. " + - "Please manually tear down the cluster and apply a new config." - if err != nil { - logger.Error(err, errContextFailedToSerialize) + sameHash, err := compareRayClusterJsonHash(oldSpec, newSpec, generateHashWithoutReplicasAndWorkersToDelete) + if err != nil || sameHash { return DoNothing } - if activeClusterHash == goalClusterHash { - logger.Info("Active Ray cluster config matches goal config. No need to update RayCluster.") - return DoNothing - } + // If everything is identical except for the Replicas and WorkersToDelete of the existing workergroups, + // and one or more new workergroups are added at the end, then update the cluster. + newSpecWithAddedWorkerGroupsStripped := newSpec.DeepCopy() + if len(newSpec.WorkerGroupSpecs) > len(oldSpec.WorkerGroupSpecs) { + // Remove the new worker groups from the new spec. + newSpecWithAddedWorkerGroupsStripped.WorkerGroupSpecs = newSpecWithAddedWorkerGroupsStripped.WorkerGroupSpecs[:len(oldSpec.WorkerGroupSpecs)] - // Case 2: Otherwise, if everything is identical except for the Replicas and WorkersToDelete of - // the existing workergroups, and one or more new workergroups are added at the end, then update the cluster. - activeClusterNumWorkerGroups, err := strconv.Atoi(activeRayCluster.ObjectMeta.Annotations[utils.NumWorkerGroupsKey]) - if err != nil { - logger.Error(err, errContextFailedToSerialize) - return DoNothing - } - goalNumWorkerGroups := len(rayServiceInstance.Spec.RayClusterSpec.WorkerGroupSpecs) - logger.Info("number of worker groups", "activeClusterNumWorkerGroups", activeClusterNumWorkerGroups, "goalNumWorkerGroups", goalNumWorkerGroups) - if goalNumWorkerGroups > activeClusterNumWorkerGroups { - - // Remove the new workergroup(s) from the end before calculating the hash. - goalClusterSpec := rayServiceInstance.Spec.RayClusterSpec.DeepCopy() - goalClusterSpec.WorkerGroupSpecs = goalClusterSpec.WorkerGroupSpecs[:activeClusterNumWorkerGroups] - - // Generate the hash of the old worker group specs. - goalClusterHash, err = generateHashWithoutReplicasAndWorkersToDelete(*goalClusterSpec) + sameHash, err = compareRayClusterJsonHash(oldSpec, *newSpecWithAddedWorkerGroupsStripped, generateHashWithoutReplicasAndWorkersToDelete) if err != nil { - logger.Error(err, errContextFailedToSerialize) return DoNothing } - - if activeClusterHash == goalClusterHash { - logger.Info("Active RayCluster config matches goal config, except that one or more entries were appended to WorkerGroupSpecs. Updating RayCluster.") - return Update + if sameHash { + return UpdatePendingCluster } } - // Case 3: Otherwise, rollout a new cluster. - logger.Info("Active RayCluster config doesn't match goal config. " + - "RayService operator should prepare a new Ray cluster.\n" + - "* Active RayCluster config hash: " + activeClusterHash + "\n" + - "* Goal RayCluster config hash: " + goalClusterHash) - return RolloutNew + // Otherwise, create the pending cluster. + return CreatePendingCluster } - return DoNothing -} + if activeRayCluster == nil { + logger.Info("No active Ray cluster. RayService operator should prepare a new Ray cluster.") + return GeneratePendingClusterName + } -// createRayClusterInstanceIfNeeded checks if we need to create a new RayCluster instance. If so, create one. -func (r *RayServiceReconciler) createRayClusterInstanceIfNeeded(ctx context.Context, rayServiceInstance *rayv1.RayService, pendingRayCluster *rayv1.RayCluster) (*rayv1.RayCluster, error) { - logger := ctrl.LoggerFrom(ctx) - // Early return if no pending RayCluster needs to be created. - if rayServiceInstance.Status.PendingServiceStatus.RayClusterName == "" { - return nil, nil + // If the KubeRay version has changed, update the RayCluster to get the cluster hash and new KubeRay version. + activeKubeRayVersion := activeRayCluster.ObjectMeta.Annotations[utils.KubeRayVersion] + if activeKubeRayVersion != utils.KUBERAY_VERSION { + logger.Info("Active RayCluster config doesn't match goal config due to mismatched KubeRay versions. Updating RayCluster.") + return UpdateActiveCluster } - var clusterAction ClusterAction - var err error + // If everything is identical except for the Replicas and WorkersToDelete of + // each WorkerGroup, then do nothing. + activeClusterHash := activeRayCluster.ObjectMeta.Annotations[utils.HashWithoutReplicasAndWorkersToDeleteKey] + goalClusterHash, err := generateHashWithoutReplicasAndWorkersToDelete(rayServiceInstance.Spec.RayClusterSpec) + errContextFailedToSerialize := "Failed to serialize new RayCluster config. " + + "Manual config updates will NOT be tracked accurately. " + + "Please manually tear down the cluster and apply a new config." + if err != nil { + logger.Error(err, errContextFailedToSerialize) + return DoNothing + } - if pendingRayCluster == nil { - clusterAction = RolloutNew - } else { - clusterAction, err = getClusterAction(pendingRayCluster.Spec, rayServiceInstance.Spec.RayClusterSpec) + if activeClusterHash == goalClusterHash { + logger.Info("Active Ray cluster config matches goal config. No need to update RayCluster.") + return DoNothing + } + + // If everything is identical except for the Replicas and WorkersToDelete of + // the existing workergroups, and one or more new workergroups are added at the end, then update the cluster. + activeClusterNumWorkerGroups, err := strconv.Atoi(activeRayCluster.ObjectMeta.Annotations[utils.NumWorkerGroupsKey]) + if err != nil { + logger.Error(err, errContextFailedToSerialize) + return DoNothing + } + goalNumWorkerGroups := len(rayServiceInstance.Spec.RayClusterSpec.WorkerGroupSpecs) + logger.Info("number of worker groups", "activeClusterNumWorkerGroups", activeClusterNumWorkerGroups, "goalNumWorkerGroups", goalNumWorkerGroups) + if goalNumWorkerGroups > activeClusterNumWorkerGroups { + + // Remove the new workergroup(s) from the end before calculating the hash. + goalClusterSpec := rayServiceInstance.Spec.RayClusterSpec.DeepCopy() + goalClusterSpec.WorkerGroupSpecs = goalClusterSpec.WorkerGroupSpecs[:activeClusterNumWorkerGroups] + + // Generate the hash of the old worker group specs. + goalClusterHash, err = generateHashWithoutReplicasAndWorkersToDelete(*goalClusterSpec) if err != nil { - logger.Error(err, "Fail to generate hash for RayClusterSpec") - return nil, err + logger.Error(err, errContextFailedToSerialize) + return DoNothing } - } - switch clusterAction { - case RolloutNew: - logger.Info("Creating a new pending RayCluster instance.") - pendingRayCluster, err = r.createRayClusterInstance(ctx, rayServiceInstance) - case Update: - logger.Info("Updating the pending RayCluster instance.") - if pendingRayCluster, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, pendingRayCluster.Name); err != nil { - return nil, err + if activeClusterHash == goalClusterHash { + logger.Info("Active RayCluster config matches goal config, except that one or more entries were appended to WorkerGroupSpecs. Updating RayCluster.") + return UpdateActiveCluster } - err = r.updateRayClusterInstance(ctx, pendingRayCluster) } - if err != nil { - return nil, err + // Otherwise, rollout a new cluster if zero-downtime upgrade is enabled. + if isZeroDowntimeUpgradeEnabled(ctx, rayServiceInstance) { + logger.Info( + "Active RayCluster config doesn't match goal config. "+ + "RayService operator should prepare a new Ray cluster.", + "activeClusterConfigHash", activeClusterHash, + "goalClusterConfigHash", goalClusterHash, + ) + return GeneratePendingClusterName } - return pendingRayCluster, nil + logger.Info("Zero-downtime upgrade is disabled. Skip preparing a new RayCluster.") + return DoNothing } // updateRayClusterInstance updates the RayCluster instance. func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, rayClusterInstance *rayv1.RayCluster) error { logger := ctrl.LoggerFrom(ctx) logger.Info("updateRayClusterInstance", "Name", rayClusterInstance.Name, "Namespace", rayClusterInstance.Namespace) - // Printing the whole RayCluster is too noisy. Only print the spec. - logger.Info("updateRayClusterInstance", "rayClusterInstance.Spec", rayClusterInstance.Spec) // Fetch the current state of the RayCluster currentRayCluster, err := r.getRayClusterByNamespacedName(ctx, client.ObjectKey{ @@ -641,7 +686,7 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray Name: rayClusterInstance.Name, }) if err != nil { - logger.Error(err, "Failed to get the current state of RayCluster", "Namespace", rayClusterInstance.Namespace, "Name", rayClusterInstance.Name) + err = fmt.Errorf("failed to get the current state of RayCluster, namespace: %s, name: %s: %w", rayClusterInstance.Namespace, rayClusterInstance.Name, err) return err } @@ -658,13 +703,7 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray currentRayCluster.Annotations = rayClusterInstance.Annotations // Update the RayCluster - if err = r.Update(ctx, currentRayCluster); err != nil { - logger.Error(err, "Fail to update RayCluster "+currentRayCluster.Name) - return err - } - - logger.Info("updated RayCluster", "rayClusterInstance", currentRayCluster) - return nil + return r.Update(ctx, currentRayCluster) } // createRayClusterInstance deletes the old RayCluster instance if exists. Only when no existing RayCluster, create a new RayCluster instance. @@ -693,7 +732,6 @@ func (r *RayServiceReconciler) createRayClusterInstance(ctx context.Context, ray } // if error is `not found`, then continue. } else if !errors.IsNotFound(err) { - logger.Error(err, "Get request rayCluster instance error!") return nil, err // if error is `not found`, then continue. } @@ -701,11 +739,9 @@ func (r *RayServiceReconciler) createRayClusterInstance(ctx context.Context, ray logger.Info("No pending RayCluster, creating RayCluster.") rayClusterInstance, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, rayClusterKey.Name) if err != nil { - logger.Error(err, "unable to construct rayCluster from spec") return nil, err } if err = r.Create(ctx, rayClusterInstance); err != nil { - logger.Error(err, "unable to create rayCluster for rayService", "rayCluster", rayClusterInstance) return nil, err } logger.Info("created rayCluster for rayService", "rayCluster", rayClusterInstance) @@ -738,6 +774,9 @@ func (r *RayServiceReconciler) constructRayClusterForRayService(ctx context.Cont } rayClusterAnnotations[utils.NumWorkerGroupsKey] = strconv.Itoa(len(rayService.Spec.RayClusterSpec.WorkerGroupSpecs)) + // set the KubeRay version used to create the RayCluster + rayClusterAnnotations[utils.KubeRayVersion] = utils.KUBERAY_VERSION + rayCluster := &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Labels: rayClusterLabel, @@ -760,32 +799,28 @@ func (r *RayServiceReconciler) checkIfNeedSubmitServeDeployment(ctx context.Cont logger := ctrl.LoggerFrom(ctx) // If the Serve config has not been cached, update the Serve config. - cacheKey := r.generateConfigKey(rayServiceInstance, rayClusterInstance.Name) - cachedServeConfigV2, exist := r.ServeConfigs.Get(cacheKey) - - if !exist { - logger.Info("shouldUpdate", - "shouldUpdateServe", - true, - "reason", - fmt.Sprintf( - "Nothing has been cached for cluster %s with key %s", - rayClusterInstance.Name, - cacheKey, - ), + cachedServeConfigV2 := r.getServeConfigFromCache(rayServiceInstance, rayClusterInstance.Name) + if cachedServeConfigV2 == "" { + logger.Info( + "shouldUpdate", + "shouldUpdateServe", true, + "reason", "Nothing has been cached for the cluster", + "rayClusterName", rayClusterInstance.Name, ) return true } // Handle the case that the head Pod has crashed and GCS FT is not enabled. if len(serveStatus.Applications) == 0 { - logger.Info("shouldUpdate", "should create Serve applications", true, + logger.Info( + "shouldUpdate", + "should create Serve applications", true, "reason", - fmt.Sprintf( - "No Serve application found in RayCluster %s, need to create serve applications. "+ - "A possible reason is the head Pod has crashed and GCS FT is not enabled. "+ - "Hence, the RayService CR's Serve application status is set to empty in the previous reconcile.", - rayClusterInstance.Name)) + "No Serve application found in the RayCluster, need to create serve applications. "+ + "A possible reason is the head Pod has crashed and GCS FT is not enabled. "+ + "Hence, the RayService CR's Serve application status is set to empty in the previous reconcile.", + "rayClusterName", rayClusterInstance.Name, + ) return true } @@ -796,7 +831,7 @@ func (r *RayServiceReconciler) checkIfNeedSubmitServeDeployment(ctx context.Cont if cachedServeConfigV2 != rayServiceInstance.Spec.ServeConfigV2 { shouldUpdate = true - reason = fmt.Sprintf("Current V2 Serve config doesn't match cached Serve config for cluster %s with key %s", rayClusterInstance.Name, cacheKey) + reason = fmt.Sprintf("Current V2 Serve config doesn't match cached Serve config for cluster %s", rayClusterInstance.Name) } logger.Info("shouldUpdate", "shouldUpdateServe", shouldUpdate, "reason", reason, "cachedServeConfig", cachedServeConfigV2, "current Serve config", rayServiceInstance.Spec.ServeConfigV2) @@ -814,21 +849,20 @@ func (r *RayServiceReconciler) updateServeDeployment(ctx context.Context, raySer configJson, err := json.Marshal(serveConfig) if err != nil { - return fmt.Errorf("Failed to marshal converted serve config into bytes: %v", err) + return fmt.Errorf("failed to marshal converted serve config into bytes: %w", err) } logger.Info("updateServeDeployment", "MULTI_APP json config", string(configJson)) if err := rayDashboardClient.UpdateDeployments(ctx, configJson); err != nil { err = fmt.Errorf( - "Fail to create / update Serve applications. If you observe this error consistently, "+ + "fail to create / update Serve applications. If you observe this error consistently, "+ "please check \"Issue 5: Fail to create / update Serve applications.\" in "+ "https://docs.ray.io/en/master/cluster/kubernetes/troubleshooting/rayservice-troubleshooting.html#kuberay-raysvc-troubleshoot for more details. "+ "err: %v", err) return err } - cacheKey := r.generateConfigKey(rayServiceInstance, clusterName) - r.ServeConfigs.Set(cacheKey, rayServiceInstance.Spec.ServeConfigV2) - logger.Info("updateServeDeployment", "message", fmt.Sprintf("Cached Serve config for Ray cluster %s with key %s", clusterName, cacheKey)) + r.cacheServeConfig(rayServiceInstance, clusterName) + logger.Info("updateServeDeployment", "message", "Cached Serve config for Ray cluster with the key", "rayClusterName", clusterName) return nil } @@ -838,14 +872,14 @@ func (r *RayServiceReconciler) updateServeDeployment(ctx context.Context, raySer // (1) `isReady` is used to determine whether the Serve applications in the RayCluster are ready to serve incoming traffic or not. // (2) `err`: If `err` is not nil, it means that KubeRay failed to get Serve application statuses from the dashboard. We should take a look at dashboard rather than Ray Serve applications. -func (r *RayServiceReconciler) getAndCheckServeStatus(ctx context.Context, dashboardClient utils.RayDashboardClientInterface, rayServiceServeStatus *rayv1.RayServiceStatus) (bool, error) { +func getAndCheckServeStatus(ctx context.Context, dashboardClient utils.RayDashboardClientInterface, rayServiceServeStatus *rayv1.RayServiceStatus) (bool, error) { logger := ctrl.LoggerFrom(ctx) var serveAppStatuses map[string]*utils.ServeApplicationStatus var err error if serveAppStatuses, err = dashboardClient.GetMultiApplicationStatus(ctx); err != nil { err = fmt.Errorf( - "Failed to get Serve application statuses from the dashboard. "+ - "If you observe this error consistently, please check https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayservice-troubleshooting.md for more details. "+ + "failed to get Serve application statuses from the dashboard. "+ + "If you observe this error consistently, please check https://docs.ray.io/en/latest/cluster/kubernetes/troubleshooting/rayservice-troubleshooting.html for more details. "+ "err: %v", err) return false, err } @@ -875,9 +909,9 @@ func (r *RayServiceReconciler) getAndCheckServeStatus(ctx context.Context, dashb if prevApplicationStatus.HealthLastUpdateTime != nil { applicationStatus.HealthLastUpdateTime = prevApplicationStatus.HealthLastUpdateTime logger.Info("Ray Serve application is unhealthy", "appName", appName, "detail", - fmt.Sprintf( - "The status of the serve application %s has been UNHEALTHY or DEPLOY_FAILED since %v. ", - appName, prevApplicationStatus.HealthLastUpdateTime)) + "The status of the serve application has been UNHEALTHY or DEPLOY_FAILED since last updated.", + "appName", appName, + "healthLastUpdateTime", prevApplicationStatus.HealthLastUpdateTime) } } } @@ -918,39 +952,62 @@ func (r *RayServiceReconciler) getAndCheckServeStatus(ctx context.Context, dashb return isReady, nil } -func (r *RayServiceReconciler) generateConfigKey(rayServiceInstance *rayv1.RayService, clusterName string) string { - return r.generateConfigKeyPrefix(rayServiceInstance) + clusterName +func (r *RayServiceReconciler) getServeConfigFromCache(rayServiceInstance *rayv1.RayService, clusterName string) string { + cacheKey := rayServiceInstance.Namespace + "/" + rayServiceInstance.Name + cacheValue, exist := r.ServeConfigs.Get(cacheKey) + if !exist { + return "" + } + serveConfigs := cacheValue.(cmap.ConcurrentMap[string, string]) + serveConfig, exist := serveConfigs.Get(clusterName) + if !exist { + return "" + } + return serveConfig } -func (r *RayServiceReconciler) generateConfigKeyPrefix(rayServiceInstance *rayv1.RayService) string { - return rayServiceInstance.Namespace + "/" + rayServiceInstance.Name + "/" +func (r *RayServiceReconciler) cacheServeConfig(rayServiceInstance *rayv1.RayService, clusterName string) { + serveConfig := rayServiceInstance.Spec.ServeConfigV2 + if serveConfig == "" { + return + } + cacheKey := rayServiceInstance.Namespace + "/" + rayServiceInstance.Name + cacheValue, exist := r.ServeConfigs.Get(cacheKey) + var rayServiceServeConfigs cmap.ConcurrentMap[string, string] + if !exist { + rayServiceServeConfigs = cmap.New[string]() + r.ServeConfigs.Add(cacheKey, rayServiceServeConfigs) + } else { + rayServiceServeConfigs = cacheValue.(cmap.ConcurrentMap[string, string]) + } + rayServiceServeConfigs.Set(clusterName, serveConfig) } -func (r *RayServiceReconciler) markRestartAndAddPendingClusterName(ctx context.Context, rayServiceInstance *rayv1.RayService) { - logger := ctrl.LoggerFrom(ctx) - - // Generate RayCluster name for pending cluster. - logger.Info("Current cluster is unhealthy, prepare to restart.", "Status", rayServiceInstance.Status) - rayServiceInstance.Status.ServiceStatus = rayv1.Restarting +func markPreparingNewCluster(rayServiceInstance *rayv1.RayService) { + rayServiceInstance.Status.ServiceStatus = rayv1.PreparingNewCluster rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{ RayClusterName: utils.GenerateRayClusterName(rayServiceInstance.Name), } } -func (r *RayServiceReconciler) updateRayClusterInfo(ctx context.Context, rayServiceInstance *rayv1.RayService, healthyClusterName string) { +func promotePendingClusterToActiveCluster(ctx context.Context, rayServiceInstance *rayv1.RayService) { + // Switch the pending cluster to active cluster if needed. Note that this function + // is called when the pending cluster is ready. logger := ctrl.LoggerFrom(ctx) - logger.Info("updateRayClusterInfo", "ActiveRayClusterName", rayServiceInstance.Status.ActiveServiceStatus.RayClusterName, "healthyClusterName", healthyClusterName) - if rayServiceInstance.Status.ActiveServiceStatus.RayClusterName != healthyClusterName { - rayServiceInstance.Status.ActiveServiceStatus = rayServiceInstance.Status.PendingServiceStatus - rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{} - } + + oldClusterName := rayServiceInstance.Status.ActiveServiceStatus.RayClusterName + newClusterName := rayServiceInstance.Status.PendingServiceStatus.RayClusterName + + logger.Info("Switch over to the new cluster", "OldRayClusterName", oldClusterName, "NewClusterName", newClusterName) + rayServiceInstance.Status.ActiveServiceStatus = rayServiceInstance.Status.PendingServiceStatus + rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{} + rayServiceInstance.Status.ServiceStatus = rayv1.Running } func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster, serviceType utils.ServiceType) error { logger := ctrl.LoggerFrom(ctx) logger.Info( "reconcileServices", "serviceType", serviceType, - "RayService name", rayServiceInstance.Name, "RayService namespace", rayServiceInstance.Namespace, ) var newSvc *corev1.Service @@ -962,13 +1019,12 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService case utils.ServingService: newSvc, err = common.BuildServeServiceForRayService(ctx, *rayServiceInstance, *rayClusterInstance) default: - return fmt.Errorf("unknown service type %v", serviceType) + panic(fmt.Sprintf("unknown service type %v. This should never happen. Please open an issue in the KubeRay repository.", serviceType)) } if err != nil { return err } - logger.Info("reconcileServices", "newSvc", newSvc) // Retrieve the Service from the Kubernetes cluster with the name and namespace. oldSvc := &corev1.Service{} @@ -977,26 +1033,21 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService if err == nil { // Only update the service if the RayCluster switches. if newSvc.Spec.Selector[utils.RayClusterLabelKey] == oldSvc.Spec.Selector[utils.RayClusterLabelKey] { - logger.Info(fmt.Sprintf("RayCluster %v's %v has already exists, skip Update", newSvc.Spec.Selector[utils.RayClusterLabelKey], serviceType)) + logger.Info("Service has already exists in the RayCluster, skip Update", "rayCluster", newSvc.Spec.Selector[utils.RayClusterLabelKey], "serviceType", serviceType) return nil } // ClusterIP is immutable. Starting from Kubernetes v1.21.5, if the new service does not specify a ClusterIP, // Kubernetes will assign the ClusterIP of the old service to the new one. However, to maintain compatibility // with older versions of Kubernetes, we need to assign the ClusterIP here. - if newSvc.Spec.ClusterIP == "" { - newSvc.Spec.ClusterIP = oldSvc.Spec.ClusterIP - } - - // TODO (kevin85421): Consider not only the updates of the Spec but also the ObjectMeta. + newSvc.Spec.ClusterIP = oldSvc.Spec.ClusterIP oldSvc.Spec = *newSvc.Spec.DeepCopy() - logger.Info(fmt.Sprintf("Update Kubernetes Service serviceType %v", serviceType)) + logger.Info("Update Kubernetes Service", "serviceType", serviceType) if updateErr := r.Update(ctx, oldSvc); updateErr != nil { - logger.Error(updateErr, fmt.Sprintf("Fail to update Kubernetes Service serviceType %v", serviceType), "Error", updateErr) return updateErr } } else if errors.IsNotFound(err) { - logger.Info(fmt.Sprintf("Create a Kubernetes Service for RayService serviceType %v", serviceType)) + logger.Info("Create a Kubernetes Service", "serviceType", serviceType) if err := ctrl.SetControllerReference(rayServiceInstance, newSvc, r.Scheme); err != nil { return err } @@ -1005,11 +1056,9 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService logger.Info("The Kubernetes Service already exists, no need to create.") return nil } - logger.Error(createErr, fmt.Sprintf("Fail to create Kubernetes Service serviceType %v", serviceType), "Error", createErr) return createErr } } else { - logger.Error(err, "Fail to retrieve the Kubernetes Service from the cluster!") return err } @@ -1029,10 +1078,12 @@ func (r *RayServiceReconciler) updateStatusForActiveCluster(ctx context.Context, } rayDashboardClient := r.dashboardClientFunc() - rayDashboardClient.InitClient(clientURL) + if err := rayDashboardClient.InitClient(ctx, clientURL, rayClusterInstance); err != nil { + return err + } var isReady bool - if isReady, err = r.getAndCheckServeStatus(ctx, rayDashboardClient, rayServiceStatus); err != nil { + if isReady, err = getAndCheckServeStatus(ctx, rayDashboardClient, rayServiceStatus); err != nil { return err } @@ -1041,9 +1092,9 @@ func (r *RayServiceReconciler) updateStatusForActiveCluster(ctx context.Context, return err } -// Reconciles the Serve applications on the RayCluster. Returns (ctrl.Result, isReady, error). +// Reconciles the Serve applications on the RayCluster. Returns (isReady, error). // The `isReady` flag indicates whether the RayCluster is ready to handle incoming traffic. -func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster, isActive bool) (ctrl.Result, bool, error) { +func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster, isActive bool) (bool, error) { logger := ctrl.LoggerFrom(ctx) rayServiceInstance.Status.ActiveServiceStatus.RayClusterStatus = rayClusterInstance.Status var err error @@ -1064,89 +1115,100 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns // after the head pod is running and ready. Hence, some requests to the Dashboard (e.g. `UpdateDeployments`) may fail. // This is not an issue since `UpdateDeployments` is an idempotent operation. logger.Info("Check the head Pod status of the pending RayCluster", "RayCluster name", rayClusterInstance.Name) - if isRunningAndReady, err := r.isHeadPodRunningAndReady(ctx, rayClusterInstance); err != nil || !isRunningAndReady { - if err != nil { - logger.Error(err, "Failed to check if head Pod is running and ready!") - } else { - logger.Info("Skipping the update of Serve deployments because the Ray head Pod is not ready.") + + // check the latest condition of the head Pod to see if it is ready. + if features.Enabled(features.RayClusterStatusConditions) { + if !meta.IsStatusConditionTrue(rayClusterInstance.Status.Conditions, string(rayv1.HeadPodReady)) { + logger.Info("The head Pod is not ready, requeue the resource event to avoid redundant custom resource status updates.") + return false, nil + } + } else { + if isRunningAndReady, err := r.isHeadPodRunningAndReady(ctx, rayClusterInstance); err != nil || !isRunningAndReady { + if err != nil { + logger.Error(err, "Failed to check if head Pod is running and ready!") + } else { + logger.Info("Skipping the update of Serve applications because the Ray head Pod is not ready.") + } + return false, err } - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err } - // TODO(architkulkarni): Check the RayVersion. If < 2.8.0, error. - if clientURL, err = utils.FetchHeadServiceURL(ctx, r.Client, rayClusterInstance, utils.DashboardPortName); err != nil || clientURL == "" { - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err + return false, err } + rayDashboardClient := r.dashboardClientFunc() - rayDashboardClient.InitClient(clientURL) + if err := rayDashboardClient.InitClient(ctx, clientURL, rayClusterInstance); err != nil { + return false, err + } shouldUpdate := r.checkIfNeedSubmitServeDeployment(ctx, rayServiceInstance, rayClusterInstance, rayServiceStatus) if shouldUpdate { if err = r.updateServeDeployment(ctx, rayServiceInstance, rayDashboardClient, rayClusterInstance.Name); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.WaitForServeDeploymentReady, err) - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err + return false, err } - - r.Recorder.Eventf(rayServiceInstance, "Normal", "SubmittedServeDeployment", - "Controller sent API request to update Serve deployments on cluster %s", rayClusterInstance.Name) } var isReady bool - if isReady, err = r.getAndCheckServeStatus(ctx, rayDashboardClient, rayServiceStatus); err != nil { - err = r.updateState(ctx, rayServiceInstance, rayv1.FailedToGetServeDeploymentStatus, err) - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err + if isReady, err = getAndCheckServeStatus(ctx, rayDashboardClient, rayServiceStatus); err != nil { + return false, err } logger.Info("Check serve health", "isReady", isReady, "isActive", isActive) - if isReady { - rayServiceInstance.Status.ServiceStatus = rayv1.Running - r.updateRayClusterInfo(ctx, rayServiceInstance, rayClusterInstance.Name) - r.Recorder.Event(rayServiceInstance, "Normal", "Running", "The Serve applicaton is now running and healthy.") - } else { + if !isReady { + // TODO (kevin85421): avoid always updating status if the serve applications are not ready. rayServiceInstance.Status.ServiceStatus = rayv1.WaitForServeDeploymentReady if err := r.Status().Update(ctx, rayServiceInstance); err != nil { - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, false, err + return false, err } - logger.Info("Mark cluster as waiting for Serve deployments", "rayCluster", rayClusterInstance) + logger.Info("Mark cluster as waiting for Serve applications", "rayCluster", rayClusterInstance) } - return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, isReady, nil + return isReady, nil } -func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, rayClusterInstance *rayv1.RayCluster) error { - logger := ctrl.LoggerFrom(ctx) - headPod, err := r.getHeadPod(ctx, rayClusterInstance) +func (r *RayServiceReconciler) updateHeadPodServeLabel(ctx context.Context, rayClusterInstance *rayv1.RayCluster, excludeHeadPodFromServeSvc bool) error { + // `updateHeadPodServeLabel` updates the head Pod's serve label based on the health status of the proxy actor. + // If `excludeHeadPodFromServeSvc` is true, the head Pod will not be used to serve requests, regardless of proxy actor health. + // If `excludeHeadPodFromServeSvc` is false, the head Pod's serve label will be set based on the health check result. + // The label is used by the Kubernetes serve service to determine whether to include the head Pod in the service endpoints. + if rayClusterInstance == nil { + return nil + } + + headPod, err := common.GetRayClusterHeadPod(ctx, r, rayClusterInstance) if err != nil { return err } + if headPod == nil { + return fmt.Errorf("found 0 head. cluster name %s, namespace %v", rayClusterInstance.Name, rayClusterInstance.Namespace) + } - httpProxyClient := r.httpProxyClientFunc() - httpProxyClient.InitClient() + client := r.httpProxyClientFunc() + client.InitClient() rayContainer := headPod.Spec.Containers[utils.RayContainerIndex] servingPort := utils.FindContainerPort(&rayContainer, utils.ServingPortName, utils.DefaultServingPort) - httpProxyClient.SetHostIp(headPod.Status.PodIP, servingPort) + client.SetHostIp(headPod.Status.PodIP, headPod.Namespace, headPod.Name, servingPort) + if headPod.Labels == nil { headPod.Labels = make(map[string]string) } + oldLabel := headPod.Labels[utils.RayClusterServingServiceLabelKey] + newLabel := utils.EnableRayClusterServingServiceFalse - // Make a copy of the labels for comparison later, to decide whether we need to push an update. - originalLabels := make(map[string]string, len(headPod.Labels)) - for key, value := range headPod.Labels { - originalLabels[key] = value - } - - if httpProxyClient.CheckHealth() == nil { - headPod.Labels[utils.RayClusterServingServiceLabelKey] = utils.EnableRayClusterServingServiceTrue - } else { - headPod.Labels[utils.RayClusterServingServiceLabelKey] = utils.EnableRayClusterServingServiceFalse + // If excludeHeadPodFromServeSvc is true, head Pod will not be used to serve requests + // no matter whether the proxy actor is healthy or not. Therefore, only send the health + // check request if excludeHeadPodFromServeSvc is false. + if !excludeHeadPodFromServeSvc { + isHealthy := client.CheckProxyActorHealth(ctx) == nil + newLabel = strconv.FormatBool(isHealthy) } - if !reflect.DeepEqual(originalLabels, headPod.Labels) { + if oldLabel != newLabel { + headPod.Labels[utils.RayClusterServingServiceLabelKey] = newLabel if updateErr := r.Update(ctx, headPod); updateErr != nil { - logger.Error(updateErr, "Pod label Update error!", "Pod.Error", updateErr) return updateErr } } @@ -1154,45 +1216,14 @@ func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, r return nil } -func getClusterAction(oldSpec rayv1.RayClusterSpec, newSpec rayv1.RayClusterSpec) (ClusterAction, error) { - // Return the appropriate action based on the difference in the old and new RayCluster specs. - - // Case 1: If everything is identical except for the Replicas and WorkersToDelete of - // each WorkerGroup, then do nothing. - sameHash, err := compareRayClusterJsonHash(oldSpec, newSpec, generateHashWithoutReplicasAndWorkersToDelete) - if err != nil { - return DoNothing, err - } - if sameHash { - return DoNothing, nil - } - - // Case 2: Otherwise, if everything is identical except for the Replicas and WorkersToDelete of - // the existing workergroups, and one or more new workergroups are added at the end, then update the cluster. - newSpecWithoutWorkerGroups := newSpec.DeepCopy() - if len(newSpec.WorkerGroupSpecs) > len(oldSpec.WorkerGroupSpecs) { - // Remove the new worker groups from the new spec. - newSpecWithoutWorkerGroups.WorkerGroupSpecs = newSpecWithoutWorkerGroups.WorkerGroupSpecs[:len(oldSpec.WorkerGroupSpecs)] - - sameHash, err = compareRayClusterJsonHash(oldSpec, *newSpecWithoutWorkerGroups, generateHashWithoutReplicasAndWorkersToDelete) - if err != nil { - return DoNothing, err - } - if sameHash { - return Update, nil - } - } - - // Case 3: Otherwise, rollout a new cluster. - return RolloutNew, nil -} - func generateHashWithoutReplicasAndWorkersToDelete(rayClusterSpec rayv1.RayClusterSpec) (string, error) { // Mute certain fields that will not trigger new RayCluster preparation. For example, // Autoscaler will update `Replicas` and `WorkersToDelete` when scaling up/down. updatedRayClusterSpec := rayClusterSpec.DeepCopy() for i := 0; i < len(updatedRayClusterSpec.WorkerGroupSpecs); i++ { updatedRayClusterSpec.WorkerGroupSpecs[i].Replicas = nil + updatedRayClusterSpec.WorkerGroupSpecs[i].MaxReplicas = nil + updatedRayClusterSpec.WorkerGroupSpecs[i].MinReplicas = nil updatedRayClusterSpec.WorkerGroupSpecs[i].ScaleStrategy.WorkersToDelete = nil } @@ -1215,29 +1246,16 @@ func compareRayClusterJsonHash(spec1 rayv1.RayClusterSpec, spec2 rayv1.RayCluste // isHeadPodRunningAndReady checks if the head pod of the RayCluster is running and ready. func (r *RayServiceReconciler) isHeadPodRunningAndReady(ctx context.Context, instance *rayv1.RayCluster) (bool, error) { - headPod, err := r.getHeadPod(ctx, instance) + headPod, err := common.GetRayClusterHeadPod(ctx, r, instance) if err != nil { return false, err } + if headPod == nil { + return false, fmt.Errorf("found 0 head. cluster name %s, namespace %v", instance.Name, instance.Namespace) + } return utils.IsRunningAndReady(headPod), nil } func isServeAppUnhealthyOrDeployedFailed(appStatus string) bool { return appStatus == rayv1.ApplicationStatusEnum.UNHEALTHY || appStatus == rayv1.ApplicationStatusEnum.DEPLOY_FAILED } - -// TODO: Move this function to util.go and always use this function to retrieve the head Pod. -func (r *RayServiceReconciler) getHeadPod(ctx context.Context, instance *rayv1.RayCluster) (*corev1.Pod, error) { - podList := corev1.PodList{} - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)} - - if err := r.List(ctx, &podList, client.InNamespace(instance.Namespace), filterLabels); err != nil { - return nil, err - } - - if len(podList.Items) != 1 { - return nil, fmt.Errorf("Found %d head pods for RayCluster %s in the namespace %s", len(podList.Items), instance.Name, instance.Namespace) - } - - return &podList.Items[0], nil -} diff --git a/ray-operator/controllers/ray/rayservice_controller_test.go b/ray-operator/controllers/ray/rayservice_controller_test.go index 8bd4eb32584..2a3bfc3a5a3 100644 --- a/ray-operator/controllers/ray/rayservice_controller_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_test.go @@ -21,16 +21,20 @@ import ( "os" "time" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/test/support" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/utils/pointer" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/retry" @@ -78,7 +82,7 @@ var _ = Context("Inside the default namespace", func() { Spec: rayv1.RayServiceSpec{ ServeConfigV2: testServeConfigV2, RayClusterSpec: rayv1.RayClusterSpec{ - RayVersion: "2.9.0", + RayVersion: support.GetRayVersion(), HeadGroupSpec: rayv1.HeadGroupSpec{ RayStartParams: map[string]string{ "port": "6379", @@ -101,7 +105,7 @@ var _ = Context("Inside the default namespace", func() { Containers: []corev1.Container{ { Name: "ray-head", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Env: []corev1.EnvVar{ { Name: "MY_POD_IP", @@ -155,9 +159,9 @@ var _ = Context("Inside the default namespace", func() { }, WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(0), - MaxReplicas: pointer.Int32(10000), + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](0), + MaxReplicas: ptr.To[int32](10000), GroupName: "small-group", RayStartParams: map[string]string{ "port": "6379", @@ -175,7 +179,7 @@ var _ = Context("Inside the default namespace", func() { Containers: []corev1.Container{ { Name: "ray-worker", - Image: "rayproject/ray:2.9.0", + Image: support.GetRayImage(), Command: []string{"echo"}, Args: []string{"Hello Ray"}, Env: []corev1.EnvVar{ @@ -214,7 +218,7 @@ var _ = Context("Inside the default namespace", func() { myRayCluster := &rayv1.RayCluster{} - Describe("When creating a rayservice", func() { + Describe("When creating a rayservice", Ordered, func() { It("should create a rayservice object", func() { err := k8sClient.Create(ctx, myRayService) Expect(err).NotTo(HaveOccurred(), "failed to create test RayService resource") @@ -233,7 +237,7 @@ var _ = Context("Inside the default namespace", func() { pendingRayClusterName := myRayService.Status.PendingServiceStatus.RayClusterName // Update the status of the head Pod to Running. - updateHeadPodToRunningAndReady(ctx, pendingRayClusterName) + updateHeadPodToRunningAndReady(ctx, pendingRayClusterName, "default") // Make sure the pending RayCluster becomes the active RayCluster. Eventually( @@ -247,16 +251,15 @@ var _ = Context("Inside the default namespace", func() { }) It("should create more than 1 worker", func() { - filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: myRayService.Status.ActiveServiceStatus.RayClusterName, utils.RayNodeGroupLabelKey: "small-group"} Eventually( - listResourceFunc(ctx, &workerPods, filterLabels, &client.ListOptions{Namespace: "default"}), + listResourceFunc(ctx, &workerPods, common.RayClusterGroupPodsAssociationOptions(myRayCluster, "small-group").ToListOptions()...), time.Second*15, time.Millisecond*500).Should(Equal(3), fmt.Sprintf("workerGroup %v", workerPods.Items)) if len(workerPods.Items) > 0 { Expect(workerPods.Items[0].Status.Phase).Should(Or(Equal(corev1.PodRunning), Equal(corev1.PodPending))) // All the worker Pods should have a port with the name "dashboard-agent" for _, pod := range workerPods.Items { // Worker Pod should have only one container. - Expect(len(pod.Spec.Containers)).Should(Equal(1)) + Expect(pod.Spec.Containers).Should(HaveLen(1)) Expect(utils.EnvVarExists(utils.RAY_SERVE_KV_TIMEOUT_S, pod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) } } @@ -271,7 +274,7 @@ var _ = Context("Inside the default namespace", func() { It("should create a new head service resource", func() { svc := &corev1.Service{} headSvcName, err := utils.GenerateHeadServiceName(utils.RayServiceCRD, myRayService.Spec.RayClusterSpec, myRayService.Name) - Expect(err).To(BeNil(), "failed to generate head service name") + Expect(err).ToNot(HaveOccurred(), "failed to generate head service name") Eventually( getResourceFunc(ctx, client.ObjectKey{Name: headSvcName, Namespace: "default"}, svc), time.Second*15, time.Millisecond*500).Should(BeNil(), "My head service = %v", svc) @@ -304,7 +307,7 @@ var _ = Context("Inside the default namespace", func() { pendingRayClusterName := myRayService.Status.PendingServiceStatus.RayClusterName // Update the status of the head Pod to Running. - updateHeadPodToRunningAndReady(ctx, pendingRayClusterName) + updateHeadPodToRunningAndReady(ctx, pendingRayClusterName, "default") // Confirm switch to a new Ray Cluster. Eventually( @@ -380,7 +383,7 @@ var _ = Context("Inside the default namespace", func() { getResourceFunc(ctx, client.ObjectKey{Name: myRayService.Status.ActiveServiceStatus.RayClusterName, Namespace: "default"}, myRayCluster), time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayCluster = %v", myRayCluster.Name) - cleanUpWorkersToDelete(ctx, myRayCluster, 0) + cleanUpWorkersToDelete(ctx, myRayCluster) }) It("Autoscaler updates the pending RayCluster and should not switch to a new RayCluster", func() { @@ -424,7 +427,7 @@ var _ = Context("Inside the default namespace", func() { // The pending RayCluster will become the active RayCluster after: // (1) The pending RayCluster's head Pod becomes Running and Ready // (2) The pending RayCluster's Serve Deployments are HEALTHY. - updateHeadPodToRunningAndReady(ctx, initialPendingClusterName) + updateHeadPodToRunningAndReady(ctx, initialPendingClusterName, "default") healthyStatus := generateServeStatus(rayv1.DeploymentStatusEnum.HEALTHY, rayv1.ApplicationStatusEnum.RUNNING) fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{testServeAppName: &healthyStatus}) Eventually( @@ -434,7 +437,7 @@ var _ = Context("Inside the default namespace", func() { getRayClusterNameFunc(ctx, myRayService), time.Second*15, time.Millisecond*500).Should(Equal(initialPendingClusterName), "New active RayCluster name = %v", myRayService.Status.ActiveServiceStatus.RayClusterName) - cleanUpWorkersToDelete(ctx, myRayCluster, 0) + cleanUpWorkersToDelete(ctx, myRayCluster) }) It("should update the active RayCluster in place when WorkerGroupSpecs are modified by the user in RayServiceSpec", func() { initialClusterName, _ := getRayClusterNameFunc(ctx, myRayService)() @@ -521,7 +524,7 @@ var _ = Context("Inside the default namespace", func() { // The pending RayCluster will become the active RayCluster after: // (1) The pending RayCluster's head Pod becomes Running and Ready // (2) The pending RayCluster's Serve Deployments are HEALTHY. - updateHeadPodToRunningAndReady(ctx, initialPendingClusterName) + updateHeadPodToRunningAndReady(ctx, initialPendingClusterName, "default") healthyStatus := generateServeStatus(rayv1.DeploymentStatusEnum.HEALTHY, rayv1.ApplicationStatusEnum.RUNNING) fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{testServeAppName: &healthyStatus}) Eventually( @@ -569,8 +572,7 @@ var _ = Context("Inside the default namespace", func() { // Note: LastUpdateTime/HealthLastUpdateTime will be overwritten via metav1.Now() in rayservice_controller.go. // Hence, we cannot use `newTime` to check whether the status is updated or not. Eventually( - checkAllDeploymentStatusesUnhealthy(ctx, myRayService), - time.Second*3, time.Millisecond*500).Should(BeTrue(), "myRayService status = %v", myRayService.Status) + checkAllDeploymentStatusesUnhealthy).WithContext(ctx).WithArguments(myRayService).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(BeTrue(), "myRayService status = %v", myRayService.Status) healthyStatus := generateServeStatus(rayv1.DeploymentStatusEnum.HEALTHY, rayv1.ApplicationStatusEnum.RUNNING) fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{testServeAppName: &healthyStatus}) @@ -667,7 +669,7 @@ var _ = Context("Inside the default namespace", func() { // The cluster should switch once the deployments are finished updating healthyStatus := generateServeStatus(rayv1.DeploymentStatusEnum.HEALTHY, rayv1.ApplicationStatusEnum.RUNNING) fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{testServeAppName: &healthyStatus}) - updateHeadPodToRunningAndReady(ctx, pendingRayClusterName) + updateHeadPodToRunningAndReady(ctx, pendingRayClusterName, "default") Eventually( getRayClusterNameFunc(ctx, myRayService), diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go index cd9735adb51..da6fac35577 100644 --- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go @@ -9,35 +9,70 @@ import ( "testing" "time" - cmap "github.com/orcaman/concurrent-map/v2" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" - "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" - "k8s.io/utils/pointer" + "k8s.io/utils/lru" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" + "github.com/ray-project/kuberay/ray-operator/test/support" ) +func TestValidateRayServiceSpec(t *testing.T) { + err := validateRayServiceSpec(&rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayv1.RayClusterSpec{ + HeadGroupSpec: rayv1.HeadGroupSpec{ + HeadService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-head-service", + }, + }, + }, + }, + }, + }) + assert.Error(t, err, "spec.rayClusterConfig.headGroupSpec.headService.metadata.name should not be set") + + err = validateRayServiceSpec(&rayv1.RayService{ + Spec: rayv1.RayServiceSpec{}, + }) + assert.NoError(t, err, "The RayService spec is valid.") + + var upgradeStrat rayv1.RayServiceUpgradeType = "invalidStrategy" + err = validateRayServiceSpec(&rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{ + Type: &upgradeStrat, + }, + }, + }) + assert.Error(t, err, "spec.UpgradeSpec.Type is invalid") +} + func TestGenerateHashWithoutReplicasAndWorkersToDelete(t *testing.T) { // `generateRayClusterJsonHash` will mute fields that will not trigger new RayCluster preparation. For example, // Autoscaler will update `Replicas` and `WorkersToDelete` when scaling up/down. Hence, `hash1` should be equal to // `hash2` in this case. cluster := rayv1.RayCluster{ Spec: rayv1.RayClusterSpec{ - RayVersion: "2.9.0", + RayVersion: support.GetRayVersion(), WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{}, }, - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), + Replicas: ptr.To[int32](2), + MinReplicas: ptr.To[int32](1), + MaxReplicas: ptr.To[int32](4), }, }, }, @@ -58,106 +93,243 @@ func TestGenerateHashWithoutReplicasAndWorkersToDelete(t *testing.T) { assert.NotEqual(t, hash1, hash3) } -func TestGetClusterAction(t *testing.T) { - clusterSpec1 := rayv1.RayClusterSpec{ - RayVersion: "2.9.0", - WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ - { - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), +func TestDecideClusterAction(t *testing.T) { + ctx := context.TODO() + + fillAnnotations := func(rayCluster *rayv1.RayCluster) { + hash, _ := generateHashWithoutReplicasAndWorkersToDelete(rayCluster.Spec) + rayCluster.ObjectMeta.Annotations[utils.HashWithoutReplicasAndWorkersToDeleteKey] = hash + rayCluster.ObjectMeta.Annotations[utils.NumWorkerGroupsKey] = strconv.Itoa(len(rayCluster.Spec.WorkerGroupSpecs)) + } + + rayServiceStatusWithPendingCluster := rayv1.RayServiceStatuses{ + PendingServiceStatus: rayv1.RayServiceStatus{ + RayClusterName: "new-cluster", + }, + } + + rayClusterBase := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + utils.KubeRayVersion: utils.KUBERAY_VERSION, + }, + }, + Spec: rayv1.RayClusterSpec{ + RayVersion: "1.0.0", + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + Replicas: ptr.To[int32](2), + MinReplicas: ptr.To[int32](1), + MaxReplicas: ptr.To[int32](4), + GroupName: "worker-group-1", + ScaleStrategy: rayv1.ScaleStrategy{ + WorkersToDelete: []string{"worker-1", "worker-2"}, + }, + }, }, }, } - clusterSpec2 := clusterSpec1.DeepCopy() - clusterSpec2.RayVersion = "2.100.0" + fillAnnotations(rayClusterBase) + + rayClusterDifferentRayVersion := rayClusterBase.DeepCopy() + rayClusterDifferentRayVersion.Spec.RayVersion = "2.0.0" + fillAnnotations(rayClusterDifferentRayVersion) + + rayClusterDifferentReplicasAndWorkersToDelete := rayClusterBase.DeepCopy() + rayClusterDifferentReplicasAndWorkersToDelete.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](3) + rayClusterDifferentReplicasAndWorkersToDelete.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{"worker-3", "worker-4"} + fillAnnotations(rayClusterDifferentReplicasAndWorkersToDelete) + + rayClusterDifferentWorkerGroup := rayClusterBase.DeepCopy() + rayClusterDifferentWorkerGroup.Spec.WorkerGroupSpecs[0].GroupName = "worker-group-2" + fillAnnotations(rayClusterDifferentWorkerGroup) + + rayClusterAdditionalWorkerGroup := rayClusterBase.DeepCopy() + rayClusterAdditionalWorkerGroup.Spec.WorkerGroupSpecs = append(rayClusterAdditionalWorkerGroup.Spec.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ + Replicas: ptr.To[int32](3), + MinReplicas: ptr.To[int32](2), + MaxReplicas: ptr.To[int32](5), + GroupName: "worker-group-2", + }) + fillAnnotations(rayClusterAdditionalWorkerGroup) - // Test Case 1: Different RayVersions should lead to RolloutNew. - action, err := getClusterAction(clusterSpec1, *clusterSpec2) - assert.Nil(t, err) - assert.Equal(t, RolloutNew, action) + rayClusterWorkerGroupRemoved := rayClusterBase.DeepCopy() + rayClusterWorkerGroupRemoved.Spec.WorkerGroupSpecs = []rayv1.WorkerGroupSpec{} + fillAnnotations(rayClusterWorkerGroupRemoved) - // Test Case 2: Same spec should lead to DoNothing. - action, err = getClusterAction(clusterSpec1, clusterSpec1) - assert.Nil(t, err) - assert.Equal(t, DoNothing, action) + rayClusterDifferentKubeRayVersion := rayClusterBase.DeepCopy() + rayClusterDifferentKubeRayVersion.ObjectMeta.Annotations[utils.KubeRayVersion] = "some-other-version" - // Test Case 3: Different WorkerGroupSpecs should lead to RolloutNew. - clusterSpec3 := clusterSpec1.DeepCopy() - clusterSpec3.WorkerGroupSpecs[0].MinReplicas = pointer.Int32(5) - action, err = getClusterAction(clusterSpec1, *clusterSpec3) - assert.Nil(t, err) - assert.Equal(t, RolloutNew, action) - - // Test Case 4: Adding a new WorkerGroupSpec should lead to Update. - clusterSpec4 := clusterSpec1.DeepCopy() - clusterSpec4.WorkerGroupSpecs = append(clusterSpec4.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), - }) - action, err = getClusterAction(clusterSpec1, *clusterSpec4) - assert.Nil(t, err) - assert.Equal(t, Update, action) - - // Test Case 5: Removing a WorkerGroupSpec should lead to RolloutNew. - clusterSpec5 := clusterSpec1.DeepCopy() - clusterSpec5.WorkerGroupSpecs = []rayv1.WorkerGroupSpec{} - action, err = getClusterAction(clusterSpec1, *clusterSpec5) - assert.Nil(t, err) - assert.Equal(t, RolloutNew, action) + tests := []struct { + rayService *rayv1.RayService + activeRayCluster *rayv1.RayCluster + pendingRayCluster *rayv1.RayCluster + name string + expectedAction ClusterAction + }{ + { + name: "Has pending cluster name and cluster spec is the same", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterBase.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: DoNothing, + }, + { + name: "Has pending cluster name and cluster spec has different Ray version", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentRayVersion.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: CreatePendingCluster, + }, + { + name: "Has pending cluster name and cluster spec has different replicas and workers to delete", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentReplicasAndWorkersToDelete.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: DoNothing, + }, + { + name: "Has pending cluster name and cluster spec has different worker group name", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentWorkerGroup.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: CreatePendingCluster, + }, + { + name: "Has pending cluster name and cluster spec has additional worker group", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterAdditionalWorkerGroup.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: UpdatePendingCluster, + }, + { + name: "Has pending cluster name and cluster spec has no worker group", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterWorkerGroupRemoved.Spec, + }, + Status: rayServiceStatusWithPendingCluster, + }, + activeRayCluster: nil, + pendingRayCluster: rayClusterBase, + expectedAction: CreatePendingCluster, + }, + { + name: "No pending cluster name and no active cluster", + rayService: &rayv1.RayService{}, + activeRayCluster: nil, + pendingRayCluster: nil, + expectedAction: GeneratePendingClusterName, + }, + { + name: "No pending cluster name and active cluster has different KubeRay version", + rayService: &rayv1.RayService{}, + activeRayCluster: rayClusterDifferentKubeRayVersion, + pendingRayCluster: nil, + expectedAction: UpdateActiveCluster, + }, + { + name: "No pending cluster name and cluster spec is the same", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterBase.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: DoNothing, + }, + { + name: "No pending cluster name and cluster spec has different Ray version", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentRayVersion.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: GeneratePendingClusterName, + }, + { + name: "No pending cluster name and cluster spec has different replicas and workers to delete", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentReplicasAndWorkersToDelete.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: DoNothing, + }, + { + name: "No pending cluster name and cluster spec has different worker group name", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterDifferentWorkerGroup.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: GeneratePendingClusterName, + }, + { + name: "No pending cluster name and cluster spec has additional worker group", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterAdditionalWorkerGroup.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: UpdateActiveCluster, + }, + { + name: "No pending cluster name and cluster spec has no worker group", + rayService: &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayClusterWorkerGroupRemoved.Spec, + }, + }, + activeRayCluster: rayClusterBase, + pendingRayCluster: nil, + expectedAction: GeneratePendingClusterName, + }, + } - // Test Case 6: Only changing the number of replicas should lead to DoNothing. - clusterSpec6 := clusterSpec1.DeepCopy() - clusterSpec6.WorkerGroupSpecs[0].Replicas = pointer.Int32(3) - action, err = getClusterAction(clusterSpec1, *clusterSpec6) - assert.Nil(t, err) - assert.Equal(t, DoNothing, action) - - // Test Case 7: Only changing the number of replicas in an existing WorkerGroupSpec *and* adding a new WorkerGroupSpec should lead to Update. - clusterSpec7 := clusterSpec1.DeepCopy() - clusterSpec7.WorkerGroupSpecs[0].Replicas = pointer.Int32(3) - clusterSpec7.WorkerGroupSpecs = append(clusterSpec7.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), - }) - action, err = getClusterAction(clusterSpec1, *clusterSpec7) - assert.Nil(t, err) - assert.Equal(t, Update, action) - - // Test Case 8: Adding two new WorkerGroupSpecs should lead to Update. - clusterSpec8 := clusterSpec1.DeepCopy() - clusterSpec8.WorkerGroupSpecs = append(clusterSpec8.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), - }) - clusterSpec8.WorkerGroupSpecs = append(clusterSpec8.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ - Replicas: pointer.Int32(3), - MinReplicas: pointer.Int32(2), - MaxReplicas: pointer.Int32(5), - }) - action, err = getClusterAction(clusterSpec1, *clusterSpec8) - assert.Nil(t, err) - assert.Equal(t, Update, action) - - // Test Case 9: Changing an existing WorkerGroupSpec outside of Replicas/WorkersToDelete *and* adding a new WorkerGroupSpec should lead to RolloutNew. - clusterSpec9 := clusterSpec1.DeepCopy() - clusterSpec9.WorkerGroupSpecs[0].MaxReplicas = pointer.Int32(5) - clusterSpec9.WorkerGroupSpecs = append(clusterSpec9.WorkerGroupSpecs, rayv1.WorkerGroupSpec{ - Replicas: pointer.Int32(2), - MinReplicas: pointer.Int32(1), - MaxReplicas: pointer.Int32(4), - }) - action, err = getClusterAction(clusterSpec1, *clusterSpec9) - assert.Nil(t, err) - assert.Equal(t, RolloutNew, action) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + action := decideClusterAction(ctx, tt.rayService, tt.activeRayCluster, tt.pendingRayCluster) + assert.Equal(t, tt.expectedAction, action) + }) + } } func TestInconsistentRayServiceStatuses(t *testing.T) { - r := &RayServiceReconciler{} - timeNow := metav1.Now() oldStatus := rayv1.RayServiceStatuses{ ActiveServiceStatus: rayv1.RayServiceStatus{ @@ -194,18 +366,18 @@ func TestInconsistentRayServiceStatuses(t *testing.T) { }, }, }, - ServiceStatus: rayv1.Restarting, + ServiceStatus: rayv1.PreparingNewCluster, } ctx := context.Background() // Test 1: Update ServiceStatus only. newStatus := oldStatus.DeepCopy() newStatus.ServiceStatus = rayv1.WaitForServeDeploymentReady - assert.True(t, r.inconsistentRayServiceStatuses(ctx, oldStatus, *newStatus)) + assert.True(t, inconsistentRayServiceStatuses(ctx, oldStatus, *newStatus)) // Test 2: Test RayServiceStatus newStatus = oldStatus.DeepCopy() - assert.False(t, r.inconsistentRayServiceStatuses(ctx, oldStatus, *newStatus)) + assert.False(t, inconsistentRayServiceStatuses(ctx, oldStatus, *newStatus)) } func TestInconsistentRayServiceStatus(t *testing.T) { @@ -240,7 +412,6 @@ func TestInconsistentRayServiceStatus(t *testing.T) { }, } - r := &RayServiceReconciler{} ctx := context.Background() // Test 1: Only HealthLastUpdateTime is updated. @@ -249,7 +420,7 @@ func TestInconsistentRayServiceStatus(t *testing.T) { application.HealthLastUpdateTime = &metav1.Time{Time: timeNow.Add(1)} newStatus.Applications[appName] = application } - assert.False(t, r.inconsistentRayServiceStatus(ctx, oldStatus, *newStatus)) + assert.False(t, inconsistentRayServiceStatus(ctx, oldStatus, *newStatus)) } func TestIsHeadPodRunningAndReady(t *testing.T) { @@ -474,17 +645,8 @@ func TestGetAndCheckServeStatus(t *testing.T) { _ = rayv1.AddToScheme(newScheme) _ = corev1.AddToScheme(newScheme) - // Initialize a fake client with newScheme and runtimeObjects. - runtimeObjects := []runtime.Object{} - fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() - // Initialize RayService reconciler. ctx := context.TODO() - r := RayServiceReconciler{ - Client: fakeClient, - Recorder: &record.FakeRecorder{}, - Scheme: scheme.Scheme, - } serveAppName := "serve-app-1" longPeriod := time.Duration(10000) shortPeriod := time.Duration(1) @@ -613,7 +775,7 @@ func TestGetAndCheckServeStatus(t *testing.T) { dashboardClient = &utils.FakeRayDashboardClient{} } prevRayServiceStatus := rayv1.RayServiceStatus{Applications: tc.applications} - isReady, err := r.getAndCheckServeStatus(ctx, dashboardClient, &prevRayServiceStatus) + isReady, err := getAndCheckServeStatus(ctx, dashboardClient, &prevRayServiceStatus) assert.Nil(t, err) assert.Equal(t, tc.expectedReady, isReady) }) @@ -635,7 +797,7 @@ func TestCheckIfNeedSubmitServeDeployment(t *testing.T) { Client: fakeClient, Recorder: &record.FakeRecorder{}, Scheme: scheme.Scheme, - ServeConfigs: cmap.New[string](), + ServeConfigs: lru.New(utils.ServeConfigLRUSize), } namespace := "ray" @@ -671,22 +833,21 @@ applications: // Test 1: The RayCluster is new, and this is the first reconciliation after the RayCluster becomes ready. // No Serve application has been created yet, so the RayService's serve configuration has not been cached in // `r.ServeConfigs`. - cacheKey := r.generateConfigKey(&rayService, cluster.Name) - _, exist := r.ServeConfigs.Get(cacheKey) - assert.False(t, exist) + serveConfig := r.getServeConfigFromCache(&rayService, cluster.Name) + assert.Empty(t, serveConfig) shouldCreate := r.checkIfNeedSubmitServeDeployment(ctx, &rayService, &cluster, &rayv1.RayServiceStatus{}) assert.True(t, shouldCreate) // Test 2: The RayCluster is not new, but the head Pod without GCS FT-enabled crashes and restarts. // Hence, the RayService's Serve application status is empty, but the KubeRay operator has cached the Serve // application's configuration. - r.ServeConfigs.Set(cacheKey, rayService.Spec.ServeConfigV2) // Simulate the Serve application's configuration has been cached. + r.cacheServeConfig(&rayService, cluster.Name) // Simulate the Serve application's configuration has been cached. shouldCreate = r.checkIfNeedSubmitServeDeployment(ctx, &rayService, &cluster, &rayv1.RayServiceStatus{}) assert.True(t, shouldCreate) // Test 3: The Serve application has been created, and the RayService's status has been updated. - _, exist = r.ServeConfigs.Get(cacheKey) - assert.True(t, exist) + serveConfig = r.getServeConfigFromCache(&rayService, cluster.Name) + assert.NotEmpty(t, serveConfig) serveStatus := rayv1.RayServiceStatus{ Applications: map[string]rayv1.AppStatus{ "myapp": { @@ -732,15 +893,19 @@ func TestReconcileRayCluster(t *testing.T) { Annotations: map[string]string{ utils.HashWithoutReplicasAndWorkersToDeleteKey: hash, utils.NumWorkerGroupsKey: strconv.Itoa(len(rayService.Spec.RayClusterSpec.WorkerGroupSpecs)), + utils.KubeRayVersion: utils.KUBERAY_VERSION, }, }, } tests := map[string]struct { activeCluster *rayv1.RayCluster + rayServiceUpgradeType rayv1.RayServiceUpgradeType + kubeRayVersion string updateRayClusterSpec bool enableZeroDowntime bool shouldPrepareNewCluster bool + updateKubeRayVersion bool }{ // Test 1: Neither active nor pending clusters exist. The `markRestart` function will be called, so the `PendingServiceStatus.RayClusterName` should be set. "Zero-downtime upgrade is enabled. Neither active nor pending clusters exist.": { @@ -763,8 +928,8 @@ func TestReconcileRayCluster(t *testing.T) { enableZeroDowntime: true, shouldPrepareNewCluster: true, }, - // Test 4: The active cluster exists. Trigger the zero-downtime upgrade. - "Zero-downtime upgrade is disabled. The active cluster exists. Trigger the zero-downtime upgrade.": { + // Test 4: The active cluster exists. Zero-downtime upgrade is false, should not trigger zero-downtime upgrade. + "Zero-downtime upgrade is disabled. The active cluster exists. Does not trigger the zero-downtime upgrade.": { activeCluster: activeCluster.DeepCopy(), updateRayClusterSpec: true, enableZeroDowntime: false, @@ -777,6 +942,56 @@ func TestReconcileRayCluster(t *testing.T) { enableZeroDowntime: false, shouldPrepareNewCluster: true, }, + // Test 6: If the active KubeRay version doesn't match the KubeRay version annotation on the RayCluster, update the RayCluster's hash and KubeRay version + // annotations first before checking whether to trigger a zero downtime upgrade. This behavior occurs because when we upgrade the KubeRay CRD, the hash + // generated by different KubeRay versions may differ, which can accidentally trigger a zero downtime upgrade. + "Active RayCluster exists. KubeRay version is mismatched. Update the RayCluster.": { + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: true, + shouldPrepareNewCluster: false, + updateKubeRayVersion: true, + kubeRayVersion: "new-version", + }, + // Test 7: Zero downtime upgrade is enabled, but is enabled through the RayServiceSpec + "Zero-downtime upgrade enabled. The active cluster exist. Zero-downtime upgrade is triggered through RayServiceSpec.": { + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: true, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: rayv1.NewCluster, + }, + // Test 8: Zero downtime upgrade is enabled. Env var is set to false but RayServiceSpec is set to NewCluster. Trigger the zero-downtime upgrade. + "Zero-downtime upgrade is enabled through RayServiceSpec and not through env var. Active cluster exist. Trigger the zero-downtime upgrade.": { + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: false, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: rayv1.NewCluster, + }, + // Test 9: Zero downtime upgrade is disabled. Env var is set to true but RayServiceSpec is set to None. + "Zero-downtime upgrade is disabled. Env var is set to true but RayServiceSpec is set to None.": { + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: true, + shouldPrepareNewCluster: false, + rayServiceUpgradeType: rayv1.None, + }, + // Test 10: Zero downtime upgrade is enabled. Neither the env var nor the RayServiceSpec is set. Trigger the zero-downtime upgrade. + "Zero-downtime upgrade is enabled. Neither the env var nor the RayServiceSpec is set.": { + activeCluster: nil, + updateRayClusterSpec: true, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: "", + }, + // Test 11: Zero downtime upgrade is disabled. Both the env var and the RayServiceSpec is set to disable zero-downtime upgrade. + "Zero-downtime upgrade is disabled by both env var and RayServiceSpec.": { + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: false, + shouldPrepareNewCluster: false, + rayServiceUpgradeType: rayv1.None, + }, } for name, tc := range tests { @@ -788,13 +1003,23 @@ func TestReconcileRayCluster(t *testing.T) { } runtimeObjects := []runtime.Object{} if tc.activeCluster != nil { + // Update 'ray.io/kuberay-version' to a new version if kubeRayVersion is set. + if tc.updateKubeRayVersion { + tc.activeCluster.Annotations[utils.KubeRayVersion] = tc.kubeRayVersion + } runtimeObjects = append(runtimeObjects, tc.activeCluster.DeepCopy()) } fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() r := RayServiceReconciler{ - Client: fakeClient, + Client: fakeClient, + Scheme: newScheme, + Recorder: record.NewFakeRecorder(1), } service := rayService.DeepCopy() + service.Spec.UpgradeStrategy = &rayv1.RayServiceUpgradeStrategy{} + if tc.rayServiceUpgradeType != "" { + service.Spec.UpgradeStrategy.Type = &tc.rayServiceUpgradeType + } if tc.updateRayClusterSpec { service.Spec.RayClusterSpec.RayVersion = "new-version" } @@ -802,9 +1027,14 @@ func TestReconcileRayCluster(t *testing.T) { service.Status.ActiveServiceStatus.RayClusterName = tc.activeCluster.Name } assert.Equal(t, "", service.Status.PendingServiceStatus.RayClusterName) - _, _, err = r.reconcileRayCluster(ctx, service) + activeRayCluster, _, err := r.reconcileRayCluster(ctx, service) assert.Nil(t, err) + // If the KubeRay version has changed, check that the RayCluster annotations have been updated to the correct version. + if tc.updateKubeRayVersion && activeRayCluster != nil { + assert.Equal(t, utils.KUBERAY_VERSION, activeRayCluster.Annotations[utils.KubeRayVersion]) + } + // If KubeRay operator is preparing a new cluster, the `PendingServiceStatus.RayClusterName` should be set by calling the function `markRestart`. if tc.shouldPrepareNewCluster { assert.NotEqual(t, "", service.Status.PendingServiceStatus.RayClusterName) @@ -821,3 +1051,92 @@ func initFakeDashboardClient(appName string, deploymentStatus string, appStatus fakeDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{appName: &status}) return &fakeDashboardClient } + +func initFakeRayHttpProxyClient(isHealthy bool) utils.RayHttpProxyClientInterface { + return &utils.FakeRayHttpProxyClient{ + IsHealthy: isHealthy, + } +} + +func TestLabelHeadPodForServeStatus(t *testing.T) { + tests := map[string]struct { + expectServeResult string + excludeHeadPodFromServeSvc bool + isHealthy bool + }{ + "Ray serve application is running, excludeHeadPodFromServeSvc is true": { + "false", + true, + true, + }, + "Ray serve application is running, excludeHeadPodFromServeSvc is false": { + "true", + false, + true, + }, + "Ray serve application is unhealthy, excludeHeadPodFromServeSvc is true": { + "false", + true, + false, + }, + "Ray serve application is unhealthy, excludeHeadPodFromServeSvc is false": { + "false", + false, + false, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + newScheme := runtime.NewScheme() + _ = corev1.AddToScheme(newScheme) + + namespace := "mock-ray-namespace" + cluster := rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: namespace, + }, + } + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "head-pod", + Namespace: cluster.ObjectMeta.Namespace, + Labels: map[string]string{ + utils.RayClusterLabelKey: cluster.ObjectMeta.Name, + utils.RayNodeTypeLabelKey: string(rayv1.HeadNode), + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + }, + }, + }, + } + // Initialize a fake client with newScheme and runtimeObjects. + runtimeObjects := []runtime.Object{headPod} + fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build() + ctx := context.TODO() + + fakeRayHttpProxyClient := initFakeRayHttpProxyClient(tc.isHealthy) + // Initialize RayService reconciler. + r := &RayServiceReconciler{ + Client: fakeClient, + Recorder: &record.FakeRecorder{}, + Scheme: newScheme, + httpProxyClientFunc: func() utils.RayHttpProxyClientInterface { + return fakeRayHttpProxyClient + }, + } + + err := r.updateHeadPodServeLabel(ctx, &cluster, tc.excludeHeadPodFromServeSvc) + assert.NoError(t, err) + // Get latest headPod status + headPod, err = common.GetRayClusterHeadPod(ctx, r, &cluster) + assert.Equal(t, headPod.Labels[utils.RayClusterServingServiceLabelKey], tc.expectServeResult) + assert.NoError(t, err) + }) + } +} diff --git a/ray-operator/controllers/ray/suite_helpers_test.go b/ray-operator/controllers/ray/suite_helpers_test.go index e3c31b22dad..66fb4885729 100644 --- a/ray-operator/controllers/ray/suite_helpers_test.go +++ b/ray-operator/controllers/ray/suite_helpers_test.go @@ -9,11 +9,12 @@ import ( "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" "github.com/onsi/gomega" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" ) func getResourceFunc(ctx context.Context, key client.ObjectKey, obj client.Object) func() error { @@ -30,7 +31,7 @@ func listResourceFunc(ctx context.Context, workerPods *corev1.PodList, opt ...cl count := 0 for _, aPod := range workerPods.Items { - if (reflect.DeepEqual(aPod.Status.Phase, corev1.PodRunning) || reflect.DeepEqual(aPod.Status.Phase, corev1.PodPending)) && aPod.DeletionTimestamp == nil { + if (reflect.DeepEqual(aPod.Status.Phase, corev1.PodRunning) || reflect.DeepEqual(aPod.Status.Phase, corev1.PodPending)) && (aPod.DeletionTimestamp == nil || len(aPod.Finalizers) != 0) { count++ } } @@ -45,15 +46,21 @@ func getClusterState(ctx context.Context, namespace string, clusterName string) if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, &cluster); err != nil { log.Fatal(err) } - return cluster.Status.State + return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } } -func isAllPodsRunning(ctx context.Context, podlist corev1.PodList, filterLabels client.MatchingLabels, namespace string) bool { - return isAllPodsRunningByFilters(ctx, podlist, filterLabels, &client.ListOptions{Namespace: namespace}) +func getClusterStatus(ctx context.Context, namespace string, clusterName string) func() rayv1.RayClusterStatus { + return func() rayv1.RayClusterStatus { + var cluster rayv1.RayCluster + if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, &cluster); err != nil { + log.Fatal(err) + } + return cluster.Status + } } -func isAllPodsRunningByFilters(ctx context.Context, podlist corev1.PodList, opt ...client.ListOption) bool { +func isAllPodsRunningByFilters(ctx context.Context, podlist corev1.PodList, opt []client.ListOption) bool { err := k8sClient.List(ctx, &podlist, opt...) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "failed to list Pods") for _, pod := range podlist.Items { @@ -64,14 +71,14 @@ func isAllPodsRunningByFilters(ctx context.Context, podlist corev1.PodList, opt return true } -func cleanUpWorkersToDelete(ctx context.Context, rayCluster *rayv1.RayCluster, workerGroupIndex int) { +func cleanUpWorkersToDelete(ctx context.Context, rayCluster *rayv1.RayCluster) { // Updating WorkersToDelete is the responsibility of the Ray Autoscaler. In this function, // we simulate the behavior of the Ray Autoscaler after the scaling process has finished. err := retry.RetryOnConflict(retry.DefaultRetry, func() error { gomega.Eventually( getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: "default"}, rayCluster), time.Second*9, time.Millisecond*500).Should(gomega.BeNil(), "raycluster = %v", rayCluster) - rayCluster.Spec.WorkerGroupSpecs[workerGroupIndex].ScaleStrategy.WorkersToDelete = []string{} + rayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{} return k8sClient.Update(ctx, rayCluster) }) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "failed to clean up WorkersToDelete") @@ -79,13 +86,31 @@ func cleanUpWorkersToDelete(ctx context.Context, rayCluster *rayv1.RayCluster, w func getRayJobDeploymentStatus(ctx context.Context, rayJob *rayv1.RayJob) func() (rayv1.JobDeploymentStatus, error) { return func() (rayv1.JobDeploymentStatus, error) { - if err := k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: "default"}, rayJob); err != nil { + if err := k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: rayJob.Namespace}, rayJob); err != nil { return "", err } return rayJob.Status.JobDeploymentStatus, nil } } +func getRayJobSucceededStatus(ctx context.Context, rayJob *rayv1.RayJob) func() (int32, error) { + return func() (int32, error) { + if err := k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: rayJob.Namespace}, rayJob); err != nil { + return 0, err + } + return *rayJob.Status.Succeeded, nil + } +} + +func getRayJobFailedStatus(ctx context.Context, rayJob *rayv1.RayJob) func() (int32, error) { + return func() (int32, error) { + if err := k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: "default"}, rayJob); err != nil { + return 0, err + } + return *rayJob.Status.Failed, nil + } +} + func getRayClusterNameForRayJob(ctx context.Context, rayJob *rayv1.RayJob) func() (string, error) { return func() (string, error) { if err := k8sClient.Get(ctx, client.ObjectKey{Name: rayJob.Name, Namespace: "default"}, rayJob); err != nil { @@ -207,10 +232,10 @@ func checkServiceHealth(ctx context.Context, rayService *rayv1.RayService) func( // There's no container runtime or any other K8s controllers. // So Pods are created, but no controller updates them from Pending to Running. // See https://book.kubebuilder.io/reference/envtest.html for more details. -func updateHeadPodToRunningAndReady(ctx context.Context, rayClusterName string) { +func updateHeadPodToRunningAndReady(ctx context.Context, rayClusterName string, namespace string) { var instance rayv1.RayCluster gomega.Eventually( - getResourceFunc(ctx, client.ObjectKey{Name: rayClusterName, Namespace: "default"}, &instance), + getResourceFunc(ctx, client.ObjectKey{Name: rayClusterName, Namespace: namespace}, &instance), time.Second*3, time.Millisecond*500).Should(gomega.BeNil(), "RayCluster %v not found", rayClusterName) headPods := corev1.PodList{} @@ -218,7 +243,7 @@ func updateHeadPodToRunningAndReady(ctx context.Context, rayClusterName string) gomega.Eventually( listResourceFunc(ctx, &headPods, headLabels...), - time.Second*15, time.Millisecond*500).Should(gomega.Equal(1), "Head pod list should have only 1 Pod = %v", headPods.Items) + time.Second*3, time.Millisecond*500).Should(gomega.Equal(1), "Head pod list should have only 1 Pod = %v", headPods.Items) headPod := headPods.Items[0] headPod.Status.Phase = corev1.PodRunning @@ -228,13 +253,64 @@ func updateHeadPodToRunningAndReady(ctx context.Context, rayClusterName string) Status: corev1.ConditionTrue, }, } - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return k8sClient.Status().Update(ctx, &headPod) - }) + err := k8sClient.Status().Update(ctx, &headPod) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to update head Pod status to PodRunning") // Make sure the head Pod is updated. gomega.Eventually( - isAllPodsRunningByFilters(ctx, headPods, headLabels...), - time.Second*15, time.Millisecond*500).Should(gomega.BeTrue(), "Head Pod should be running: %v", headPods.Items) + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(headPods, headLabels).WithTimeout(time.Second*15).WithPolling(time.Millisecond*500).Should(gomega.BeTrue(), "Head Pod should be running: %v", headPods.Items) +} + +// Update the status of the worker Pods to Running and Ready. Similar to updateHeadPodToRunningAndReady. +func updateWorkerPodsToRunningAndReady(ctx context.Context, rayClusterName string, namespace string) { + rayCluster := &rayv1.RayCluster{} + gomega.Eventually( + getResourceFunc(ctx, client.ObjectKey{Name: rayClusterName, Namespace: namespace}, rayCluster), + time.Second*3, time.Millisecond*500).Should(gomega.BeNil(), "RayCluster %v not found", rayClusterName) + + workerPods := corev1.PodList{} + workerLabels := common.RayClusterWorkerPodsAssociationOptions(rayCluster).ToListOptions() + numWorkerPods := int(*rayCluster.Spec.WorkerGroupSpecs[0].Replicas) + + gomega.Eventually( + listResourceFunc(ctx, &workerPods, workerLabels...), + time.Second*3, time.Millisecond*500).Should(gomega.Equal(numWorkerPods), "workerGroup: %v", workerPods.Items) + + for _, pod := range workerPods.Items { + pod.Status.Phase = corev1.PodRunning + pod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + } + err := k8sClient.Status().Update(ctx, &pod) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to update worker Pod status to PodRunning") + } + + // Make sure all worker Pods are updated. + gomega.Eventually( + isAllPodsRunningByFilters).WithContext(ctx).WithArguments(workerPods, workerLabels).WithTimeout(time.Second*3).WithPolling(time.Millisecond*500).Should(gomega.BeTrue(), "Worker Pods should be running: %v", workerPods.Items) +} + +func updateRayJobSuspendField(ctx context.Context, rayJob *rayv1.RayJob, suspend bool) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: rayJob.Namespace, Name: rayJob.Name}, rayJob) + if err != nil { + return err + } + rayJob.Spec.Suspend = suspend + return k8sClient.Update(ctx, rayJob) + }) +} + +func setJobIdOnRayJob(ctx context.Context, rayJob *rayv1.RayJob, jobId string) error { + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: rayJob.Namespace, Name: rayJob.Name}, rayJob) + if err != nil { + return err + } + rayJob.Spec.JobId = jobId + return k8sClient.Update(ctx, rayJob) + }) } diff --git a/ray-operator/controllers/ray/suite_test.go b/ray-operator/controllers/ray/suite_test.go index 05b4d385f7f..de73a2e1e6e 100644 --- a/ray-operator/controllers/ray/suite_test.go +++ b/ray-operator/controllers/ray/suite_test.go @@ -20,6 +20,9 @@ import ( "path/filepath" "testing" + "sigs.k8s.io/controller-runtime/pkg/manager" + + configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" @@ -51,6 +54,20 @@ var ( fakeRayHttpProxyClient *utils.FakeRayHttpProxyClient ) +type TestClientProvider struct{} + +func (testProvider TestClientProvider) GetDashboardClient(_ manager.Manager) func() utils.RayDashboardClientInterface { + return func() utils.RayDashboardClientInterface { + return fakeRayDashboardClient + } +} + +func (testProvider TestClientProvider) GetHttpProxyClient(_ manager.Manager) func() utils.RayHttpProxyClientInterface { + return func() utils.RayHttpProxyClientInterface { + return fakeRayHttpProxyClient + } +} + func TestAPIs(t *testing.T) { RegisterFailHandler(Fail) @@ -104,19 +121,15 @@ var _ = BeforeSuite(func(ctx SpecContext) { }, }, } - err = NewReconciler(ctx, mgr, options).SetupWithManager(mgr, 1) + configs := configapi.Configuration{} + err = NewReconciler(ctx, mgr, options, configs).SetupWithManager(mgr, 1) Expect(err).NotTo(HaveOccurred(), "failed to setup RayCluster controller") - err = NewRayServiceReconciler(ctx, mgr, func() utils.RayDashboardClientInterface { - return fakeRayDashboardClient - }, func() utils.RayHttpProxyClientInterface { - return fakeRayHttpProxyClient - }).SetupWithManager(mgr) + testClientProvider := TestClientProvider{} + err = NewRayServiceReconciler(ctx, mgr, testClientProvider).SetupWithManager(mgr, 1) Expect(err).NotTo(HaveOccurred(), "failed to setup RayService controller") - err = NewRayJobReconciler(ctx, mgr, func() utils.RayDashboardClientInterface { - return fakeRayDashboardClient - }).SetupWithManager(mgr) + err = NewRayJobReconciler(ctx, mgr, testClientProvider).SetupWithManager(mgr, 1) Expect(err).NotTo(HaveOccurred(), "failed to setup RayJob controller") go func() { diff --git a/ray-operator/controllers/ray/utils/constant.go b/ray-operator/controllers/ray/utils/constant.go index 9bad224addb..f108981d17b 100644 --- a/ray-operator/controllers/ray/utils/constant.go +++ b/ray-operator/controllers/ray/utils/constant.go @@ -1,5 +1,7 @@ package utils +import "errors" + const ( // Default application name @@ -23,14 +25,16 @@ const ( RayClusterHeadlessServiceLabelKey = "ray.io/headless-worker-svc" HashWithoutReplicasAndWorkersToDeleteKey = "ray.io/hash-without-replicas-and-workers-to-delete" NumWorkerGroupsKey = "ray.io/num-worker-groups" + KubeRayVersion = "ray.io/kuberay-version" // In KubeRay, the Ray container must be the first application container in a head or worker Pod. RayContainerIndex = 0 // Batch scheduling labels // TODO(tgaddair): consider making these part of the CRD - RaySchedulerName = "ray.io/scheduler-name" - RayPriorityClassName = "ray.io/priority-class-name" + RaySchedulerName = "ray.io/scheduler-name" + RayPriorityClassName = "ray.io/priority-class-name" + RayClusterGangSchedulingEnabled = "ray.io/gang-scheduling-enabled" // Ray GCS FT related annotations RayFTEnabledAnnotationKey = "ray.io/ft-enabled" @@ -59,18 +63,15 @@ const ( DashSymbol = "-" // Use as default port - DefaultClientPort = 10001 - // For Ray >= 1.11.0, "DefaultRedisPort" actually refers to the GCS server port. - // However, the role of this port is unchanged in Ray APIs like ray.init and ray start. - // This is the port used by Ray workers and drivers inside the Ray cluster to connect to the Ray head. - DefaultRedisPort = 6379 + DefaultClientPort = 10001 + DefaultGcsServerPort = 6379 DefaultDashboardPort = 8265 DefaultMetricsPort = 8080 DefaultDashboardAgentListenPort = 52365 DefaultServingPort = 8000 ClientPortName = "client" - RedisPortName = "redis" + GcsServerPortName = "gcs-server" DashboardPortName = "dashboard" MetricsPortName = "metrics" ServingPortName = "serve" @@ -94,7 +95,9 @@ const ( FQ_RAY_IP = "FQ_RAY_IP" RAY_PORT = "RAY_PORT" RAY_ADDRESS = "RAY_ADDRESS" + RAY_REDIS_ADDRESS = "RAY_REDIS_ADDRESS" REDIS_PASSWORD = "REDIS_PASSWORD" + REDIS_USERNAME = "REDIS_USERNAME" RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE = "RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE" RAY_EXTERNAL_STORAGE_NS = "RAY_external_storage_namespace" RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S = "RAY_gcs_rpc_server_reconnect_timeout_s" @@ -134,24 +137,34 @@ const ( // flag for v1.1.0 and will be removed if the behavior proves to be stable enough. ENABLE_PROBES_INJECTION = "ENABLE_PROBES_INJECTION" + // If set to true, kuberay creates a normal ClusterIP service for a Ray Head instead of a Headless service. + ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE = "ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE" + + // If set to true, the RayJob CR itself will be deleted if shutdownAfterJobFinishes is set to true. Note that all resources created by the RayJob CR will be deleted, including the K8s Job. + DELETE_RAYJOB_CR_AFTER_JOB_FINISHES = "DELETE_RAYJOB_CR_AFTER_JOB_FINISHES" + // Ray core default configurations DefaultWorkerRayGcsReconnectTimeoutS = "600" LOCAL_HOST = "127.0.0.1" // Ray FT default readiness probe values DefaultReadinessProbeInitialDelaySeconds = 10 - DefaultReadinessProbeTimeoutSeconds = 1 - DefaultReadinessProbePeriodSeconds = 5 - DefaultReadinessProbeSuccessThreshold = 1 - DefaultReadinessProbeFailureThreshold = 10 - ServeReadinessProbeFailureThreshold = 1 + DefaultReadinessProbeTimeoutSeconds = 2 + // Probe timeout for Head pod needs to be longer as it queries two endpoints (api/local_raylet_healthz & api/gcs_healthz) + DefaultHeadReadinessProbeTimeoutSeconds = 5 + DefaultReadinessProbePeriodSeconds = 5 + DefaultReadinessProbeSuccessThreshold = 1 + DefaultReadinessProbeFailureThreshold = 10 + ServeReadinessProbeFailureThreshold = 1 // Ray FT default liveness probe values DefaultLivenessProbeInitialDelaySeconds = 30 - DefaultLivenessProbeTimeoutSeconds = 1 - DefaultLivenessProbePeriodSeconds = 5 - DefaultLivenessProbeSuccessThreshold = 1 - DefaultLivenessProbeFailureThreshold = 120 + DefaultLivenessProbeTimeoutSeconds = 2 + // Probe timeout for Head pod needs to be longer as it queries two endpoints (api/local_raylet_healthz & api/gcs_healthz) + DefaultHeadLivenessProbeTimeoutSeconds = 5 + DefaultLivenessProbePeriodSeconds = 5 + DefaultLivenessProbeSuccessThreshold = 1 + DefaultLivenessProbeFailureThreshold = 120 // Ray health check related configurations // Note: Since the Raylet process and the dashboard agent process are fate-sharing, @@ -161,7 +174,7 @@ const ( RayAgentRayletHealthPath = "api/local_raylet_healthz" RayDashboardGCSHealthPath = "api/gcs_healthz" RayServeProxyHealthPath = "-/healthz" - BaseWgetHealthCommand = "wget -T 2 -q -O- http://localhost:%d/%s | grep success" + BaseWgetHealthCommand = "wget -T %d -q -O- http://localhost:%d/%s | grep success" // Finalizers for RayJob RayJobStopJobFinalizer = "ray.io/rayjob-finalizer" @@ -174,6 +187,11 @@ const ( // as well as the user-agent. This constant is updated before release. // TODO: Update KUBERAY_VERSION to be a build-time variable. KUBERAY_VERSION = "nightly" + + // KubeRayController represents the value of the default job controller + KubeRayController = "ray.io/kuberay-operator" + + ServeConfigLRUSize = 1000 ) type ServiceType string @@ -188,3 +206,100 @@ const ( func RayOriginatedFromCRDLabelValue(crdType CRDType) string { return string(crdType) } + +type errRayClusterReplicaFailure struct { + reason string +} + +func (e *errRayClusterReplicaFailure) Error() string { + return e.reason +} + +// These are markers used by the calculateStatus() for setting the RayClusterReplicaFailure condition. +var ( + ErrFailedDeleteAllPods = &errRayClusterReplicaFailure{reason: "FailedDeleteAllPods"} + ErrFailedDeleteHeadPod = &errRayClusterReplicaFailure{reason: "FailedDeleteHeadPod"} + ErrFailedCreateHeadPod = &errRayClusterReplicaFailure{reason: "FailedCreateHeadPod"} + ErrFailedDeleteWorkerPod = &errRayClusterReplicaFailure{reason: "FailedDeleteWorkerPod"} + ErrFailedCreateWorkerPod = &errRayClusterReplicaFailure{reason: "FailedCreateWorkerPod"} +) + +func RayClusterReplicaFailureReason(err error) string { + var failure *errRayClusterReplicaFailure + if errors.As(err, &failure) { + return failure.reason + } + return "" +} + +// Currently, KubeRay fires events when failures occur during the creation or deletion of resources. +type K8sEventType string + +const ( + // RayCluster event list + InvalidRayClusterStatus K8sEventType = "InvalidRayClusterStatus" + InvalidRayClusterSpec K8sEventType = "InvalidRayClusterSpec" + // Head Pod event list + CreatedHeadPod K8sEventType = "CreatedHeadPod" + FailedToCreateHeadPod K8sEventType = "FailedToCreateHeadPod" + DeletedHeadPod K8sEventType = "DeletedHeadPod" + FailedToDeleteHeadPod K8sEventType = "FailedToDeleteHeadPod" + + // Worker Pod event list + CreatedWorkerPod K8sEventType = "CreatedWorkerPod" + FailedToCreateWorkerPod K8sEventType = "FailedToCreateWorkerPod" + DeletedWorkerPod K8sEventType = "DeletedWorkerPod" + FailedToDeleteWorkerPod K8sEventType = "FailedToDeleteWorkerPod" + FailedToDeleteWorkerPodCollection K8sEventType = "FailedToDeleteWorkerPodCollection" + + // Redis Cleanup Job event list + CreatedRedisCleanupJob K8sEventType = "CreatedRedisCleanupJob" + FailedToCreateRedisCleanupJob K8sEventType = "FailedToCreateRedisCleanupJob" + + // RayJob event list + InvalidRayJobSpec K8sEventType = "InvalidRayJobSpec" + InvalidRayJobStatus K8sEventType = "InvalidRayJobStatus" + CreatedRayJobSubmitter K8sEventType = "CreatedRayJobSubmitter" + DeletedRayJobSubmitter K8sEventType = "DeletedRayJobSubmitter" + FailedToCreateRayJobSubmitter K8sEventType = "FailedToCreateRayJobSubmitter" + FailedToDeleteRayJobSubmitter K8sEventType = "FailedToDeleteRayJobSubmitter" + CreatedRayCluster K8sEventType = "CreatedRayCluster" + UpdatedRayCluster K8sEventType = "UpdatedRayCluster" + DeletedRayCluster K8sEventType = "DeletedRayCluster" + FailedToCreateRayCluster K8sEventType = "FailedToCreateRayCluster" + FailedToDeleteRayCluster K8sEventType = "FailedToDeleteRayCluster" + FailedToUpdateRayCluster K8sEventType = "FailedToUpdateRayCluster" + + // RayService event list + InvalidRayServiceSpec K8sEventType = "InvalidRayServiceSpec" + + // Generic Pod event list + DeletedPod K8sEventType = "DeletedPod" + FailedToDeletePod K8sEventType = "FailedToDeletePod" + FailedToDeletePodCollection K8sEventType = "FailedToDeletePodCollection" + + // Ingress event list + CreatedIngress K8sEventType = "CreatedIngress" + FailedToCreateIngress K8sEventType = "FailedToCreateIngress" + + // Route event list + CreatedRoute K8sEventType = "CreatedRoute" + FailedToCreateRoute K8sEventType = "FailedToCreateRoute" + + // Service event list + CreatedService K8sEventType = "CreatedService" + FailedToCreateService K8sEventType = "FailedToCreateService" + + // ServiceAccount event list + CreatedServiceAccount K8sEventType = "CreatedServiceAccount" + FailedToCreateServiceAccount K8sEventType = "FailedToCreateServiceAccount" + AutoscalerServiceAccountNotFound K8sEventType = "AutoscalerServiceAccountNotFound" + + // Role event list + CreatedRole K8sEventType = "CreatedRole" + FailedToCreateRole K8sEventType = "FailedToCreateRole" + + // RoleBinding list + CreatedRoleBinding K8sEventType = "CreatedRoleBinding" + FailedToCreateRoleBinding K8sEventType = "FailedToCreateRoleBinding" +) diff --git a/ray-operator/controllers/ray/utils/dashboard_httpclient.go b/ray-operator/controllers/ray/utils/dashboard_httpclient.go index e864e4fa6ba..aa39dd99c22 100644 --- a/ray-operator/controllers/ray/utils/dashboard_httpclient.go +++ b/ray-operator/controllers/ray/utils/dashboard_httpclient.go @@ -31,7 +31,7 @@ var ( ) type RayDashboardClientInterface interface { - InitClient(url string) + InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error UpdateDeployments(ctx context.Context, configJson []byte) error // V2/multi-app Rest API GetServeDetails(ctx context.Context) (*ServeDetails, error) @@ -46,16 +46,23 @@ type RayDashboardClientInterface interface { } type BaseDashboardClient struct { - client http.Client + client *http.Client dashboardURL string } -func GetRayDashboardClient() RayDashboardClientInterface { - return &RayDashboardClient{} +func GetRayDashboardClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayDashboardClientInterface { + return func() RayDashboardClientInterface { + return &RayDashboardClient{ + mgr: mgr, + useKubernetesProxy: useKubernetesProxy, + } + } } type RayDashboardClient struct { + mgr ctrl.Manager BaseDashboardClient + useKubernetesProxy bool } // FetchHeadServiceURL fetches the URL that consists of the FQDN for the RayCluster's head service @@ -97,15 +104,35 @@ func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *ray headSvc.Namespace, domainName, port) - log.Info("FetchHeadServiceURL", "head service URL", headServiceURL, "port", defaultPortName) + log.Info("FetchHeadServiceURL", "head service URL", headServiceURL) return headServiceURL, nil } -func (r *RayDashboardClient) InitClient(url string) { - r.client = http.Client{ +func (r *RayDashboardClient) InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error { + log := ctrl.LoggerFrom(ctx) + + if r.useKubernetesProxy { + var err error + headSvcName := rayCluster.Status.Head.ServiceName + if headSvcName == "" { + log.Info("RayCluster is missing .status.head.serviceName, calling GenerateHeadServiceName instead...", "RayCluster name", rayCluster.Name, "namespace", rayCluster.Namespace) + headSvcName, err = GenerateHeadServiceName(RayClusterCRD, rayCluster.Spec, rayCluster.Name) + if err != nil { + return err + } + } + + r.client = r.mgr.GetHTTPClient() + r.dashboardURL = fmt.Sprintf("%s/api/v1/namespaces/%s/services/%s:dashboard/proxy", r.mgr.GetConfig().Host, rayCluster.Namespace, headSvcName) + return nil + } + + r.client = &http.Client{ Timeout: 2 * time.Second, } + r.dashboardURL = "http://" + url + return nil } // UpdateDeployments update the deployments in the Ray cluster. @@ -135,7 +162,7 @@ func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson [ func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error) { serveDetails, err := r.GetServeDetails(ctx) if err != nil { - return nil, fmt.Errorf("Failed to get serve details: %v", err) + return nil, fmt.Errorf("Failed to get serve details: %w", err) } return r.ConvertServeDetailsToApplicationStatuses(serveDetails) @@ -171,12 +198,12 @@ func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error) { detailsJson, err := json.Marshal(serveDetails.Applications) if err != nil { - return nil, fmt.Errorf("Failed to marshal serve details: %v.", serveDetails.Applications) + return nil, fmt.Errorf("Failed to marshal serve details: %v", serveDetails.Applications) } applicationStatuses := map[string]*ServeApplicationStatus{} if err = json.Unmarshal(detailsJson, &applicationStatuses); err != nil { - return nil, fmt.Errorf("Failed to unmarshal serve details bytes into map of application statuses: %v. Bytes: %s", err, string(detailsJson)) + return nil, fmt.Errorf("Failed to unmarshal serve details bytes into map of application statuses: %w. Bytes: %s", err, string(detailsJson)) } return applicationStatuses, nil @@ -188,29 +215,29 @@ type RuntimeEnvType map[string]interface{} // Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec // Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/pydantic_models.py#L38-L107 type RayJobInfo struct { + ErrorType *string `json:"error_type,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` + RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` JobStatus rayv1.JobStatus `json:"status,omitempty"` Entrypoint string `json:"entrypoint,omitempty"` JobId string `json:"job_id,omitempty"` SubmissionId string `json:"submission_id,omitempty"` Message string `json:"message,omitempty"` - ErrorType *string `json:"error_type,omitempty"` StartTime uint64 `json:"start_time,omitempty"` EndTime uint64 `json:"end_time,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` } // RayJobRequest is the request body to submit. // Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec // Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/common.py#L325-L353 type RayJobRequest struct { - Entrypoint string `json:"entrypoint"` - SubmissionId string `json:"submission_id,omitempty"` RuntimeEnv RuntimeEnvType `json:"runtime_env,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` + Resources map[string]float32 `json:"entrypoint_resources,omitempty"` + Entrypoint string `json:"entrypoint"` + SubmissionId string `json:"submission_id,omitempty"` NumCpus float32 `json:"entrypoint_num_cpus,omitempty"` NumGpus float32 `json:"entrypoint_num_gpus,omitempty"` - Resources map[string]float32 `json:"entrypoint_resources,omitempty"` } type RayJobResponse struct { @@ -319,6 +346,10 @@ func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRe body, _ := io.ReadAll(resp.Body) + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return "", fmt.Errorf("SubmitJob fail: %s %s", resp.Status, string(body)) + } + var jobResp RayJobResponse if err = json.Unmarshal(body, &jobResp); err != nil { // Maybe body is not valid json, raise an error with the body. @@ -445,7 +476,7 @@ func UnmarshalRuntimeEnvYAML(runtimeEnvYAML string) (RuntimeEnvType, error) { var runtimeEnv RuntimeEnvType err := yaml.Unmarshal([]byte(runtimeEnvYAML), &runtimeEnv) if err != nil { - return nil, fmt.Errorf("failed to unmarshal RuntimeEnvYAML: %v: %v", runtimeEnvYAML, err) + return nil, fmt.Errorf("failed to unmarshal RuntimeEnvYAML: %v: %w", runtimeEnvYAML, err) } return runtimeEnv, nil } diff --git a/ray-operator/controllers/ray/utils/dashboard_httpclient_test.go b/ray-operator/controllers/ray/utils/dashboard_httpclient_test.go index 9e2e2e489c8..40f0e32e42a 100644 --- a/ray-operator/controllers/ray/utils/dashboard_httpclient_test.go +++ b/ray-operator/controllers/ray/utils/dashboard_httpclient_test.go @@ -51,14 +51,16 @@ var _ = Describe("RayFrameworkGenerator", func() { RuntimeEnvYAML: runtimeEnvStr, }, } + rayDashboardClient = &RayDashboardClient{} - rayDashboardClient.InitClient("127.0.0.1:8090") + err := rayDashboardClient.InitClient(context.Background(), "127.0.0.1:8090", nil) + Expect(err).ToNot(HaveOccurred()) }) It("Test ConvertRayJobToReq", func() { rayJobRequest, err := ConvertRayJobToReq(rayJob) - Expect(err).To(BeNil()) - Expect(len(rayJobRequest.RuntimeEnv)).To(Equal(4)) + Expect(err).ToNot(HaveOccurred()) + Expect(rayJobRequest.RuntimeEnv).To(HaveLen(4)) Expect(rayJobRequest.RuntimeEnv["working_dir"]).To(Equal("./")) }) @@ -70,7 +72,7 @@ var _ = Describe("RayFrameworkGenerator", func() { EntrypointNumGpus: 2.2, }, }) - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) Expect(rayJobRequest.NumCpus).To(Equal(float32(1.1))) Expect(rayJobRequest.NumGpus).To(Equal(float32(2.2))) Expect(rayJobRequest.Resources).To(Equal(map[string]float32{"r1": 0.1, "r2": 0.2})) @@ -89,7 +91,7 @@ var _ = Describe("RayFrameworkGenerator", func() { httpmock.Activate() defer httpmock.DeactivateAndReset() httpmock.RegisterResponder("POST", rayDashboardClient.dashboardURL+JobPath, - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { body := &RayJobResponse{ JobId: expectJobId, } @@ -97,7 +99,7 @@ var _ = Describe("RayFrameworkGenerator", func() { return httpmock.NewBytesResponse(200, bodyBytes), nil }) httpmock.RegisterResponder("GET", rayDashboardClient.dashboardURL+JobPath+expectJobId, - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { body := &RayJobInfo{ JobStatus: rayv1.JobStatusRunning, Entrypoint: rayJob.Spec.Entrypoint, @@ -107,22 +109,22 @@ var _ = Describe("RayFrameworkGenerator", func() { return httpmock.NewBytesResponse(200, bodyBytes), nil }) httpmock.RegisterResponder("GET", rayDashboardClient.dashboardURL+JobPath+errorJobId, - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { // return a string in the body return httpmock.NewStringResponse(200, "Ray misbehaved and sent string, not JSON"), nil }) jobId, err := rayDashboardClient.SubmitJob(context.TODO(), rayJob) - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) Expect(jobId).To(Equal(expectJobId)) rayJobInfo, err := rayDashboardClient.GetJobInfo(context.TODO(), jobId) - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) Expect(rayJobInfo.Entrypoint).To(Equal(rayJob.Spec.Entrypoint)) Expect(rayJobInfo.JobStatus).To(Equal(rayv1.JobStatusRunning)) _, err = rayDashboardClient.GetJobInfo(context.TODO(), errorJobId) - Expect(err).NotTo(BeNil()) + Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("GetJobInfo fail")) Expect(err.Error()).To(ContainSubstring("Ray misbehaved")) }) @@ -131,7 +133,7 @@ var _ = Describe("RayFrameworkGenerator", func() { httpmock.Activate() defer httpmock.DeactivateAndReset() httpmock.RegisterResponder("POST", rayDashboardClient.dashboardURL+JobPath+"stop-job-1/stop", - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { body := &RayJobStopResponse{ Stopped: true, } @@ -140,7 +142,7 @@ var _ = Describe("RayFrameworkGenerator", func() { }) err := rayDashboardClient.StopJob(context.TODO(), "stop-job-1") - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) }) It("Test stop succeeded job", func() { @@ -148,7 +150,7 @@ var _ = Describe("RayFrameworkGenerator", func() { httpmock.Activate() defer httpmock.DeactivateAndReset() httpmock.RegisterResponder("POST", rayDashboardClient.dashboardURL+JobPath+"stop-job-1/stop", - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { body := &RayJobStopResponse{ Stopped: false, } @@ -156,7 +158,7 @@ var _ = Describe("RayFrameworkGenerator", func() { return httpmock.NewBytesResponse(200, bodyBytes), nil }) httpmock.RegisterResponder("GET", rayDashboardClient.dashboardURL+JobPath+"stop-job-1", - func(req *http.Request) (*http.Response, error) { + func(_ *http.Request) (*http.Response, error) { body := &RayJobInfo{ JobStatus: rayv1.JobStatusSucceeded, Entrypoint: rayJob.Spec.Entrypoint, @@ -167,6 +169,6 @@ var _ = Describe("RayFrameworkGenerator", func() { }) err := rayDashboardClient.StopJob(context.TODO(), "stop-job-1") - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) }) }) diff --git a/ray-operator/controllers/ray/utils/fake_httpproxy_httpclient.go b/ray-operator/controllers/ray/utils/fake_httpproxy_httpclient.go index 4df3cabff08..ccd0f99bda6 100644 --- a/ray-operator/controllers/ray/utils/fake_httpproxy_httpclient.go +++ b/ray-operator/controllers/ray/utils/fake_httpproxy_httpclient.go @@ -1,28 +1,21 @@ package utils import ( + "context" "fmt" - "net/http" - "time" ) type FakeRayHttpProxyClient struct { - client http.Client - httpProxyURL string + IsHealthy bool } -func (r *FakeRayHttpProxyClient) InitClient() { - r.client = http.Client{ - Timeout: 20 * time.Millisecond, - } -} +func (fc *FakeRayHttpProxyClient) InitClient() {} -func (r *FakeRayHttpProxyClient) SetHostIp(hostIp string, port int) { - r.httpProxyURL = fmt.Sprintf("http://%s:%d", hostIp, port) -} +func (fc *FakeRayHttpProxyClient) SetHostIp(_, _, _ string, _ int) {} -func (r *FakeRayHttpProxyClient) CheckHealth() error { - // TODO: test check return error cases. - // Always return successful. +func (fc *FakeRayHttpProxyClient) CheckProxyActorHealth(_ context.Context) error { + if !fc.IsHealthy { + return fmt.Errorf("fake proxy actor is not healthy") + } return nil } diff --git a/ray-operator/controllers/ray/utils/fake_serve_httpclient.go b/ray-operator/controllers/ray/utils/fake_serve_httpclient.go index 7c84bf7e7a1..35de7ef866b 100644 --- a/ray-operator/controllers/ray/utils/fake_serve_httpclient.go +++ b/ray-operator/controllers/ray/utils/fake_serve_httpclient.go @@ -10,21 +10,21 @@ import ( ) type FakeRayDashboardClient struct { - BaseDashboardClient multiAppStatuses map[string]*ServeApplicationStatus - serveDetails ServeDetails - - GetJobInfoMock atomic.Pointer[func(context.Context, string) (*RayJobInfo, error)] + GetJobInfoMock atomic.Pointer[func(context.Context, string) (*RayJobInfo, error)] + BaseDashboardClient + serveDetails ServeDetails } var _ RayDashboardClientInterface = (*FakeRayDashboardClient)(nil) -func (r *FakeRayDashboardClient) InitClient(url string) { - r.client = http.Client{} +func (r *FakeRayDashboardClient) InitClient(_ context.Context, url string, _ *rayv1.RayCluster) error { + r.client = &http.Client{} r.dashboardURL = "http://" + url + return nil } -func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, configJson []byte) error { +func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error { fmt.Print("UpdateDeployments fake succeeds.") return nil } @@ -59,23 +59,23 @@ func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, e return nil, nil } -func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, rayJob *rayv1.RayJob) (jobId string, err error) { +func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, _ *rayv1.RayJob) (jobId string, err error) { return "", nil } -func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, request *RayJobRequest, name *string) (string, error) { +func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, _ *RayJobRequest, _ *string) (string, error) { return "", nil } -func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, jobName string) (*string, error) { +func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, _ string) (*string, error) { lg := "log" return &lg, nil } -func (r *FakeRayDashboardClient) StopJob(_ context.Context, jobName string) (err error) { +func (r *FakeRayDashboardClient) StopJob(_ context.Context, _ string) (err error) { return nil } -func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, jobName string) error { +func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, _ string) error { return nil } diff --git a/ray-operator/controllers/ray/utils/httpproxy_httpclient.go b/ray-operator/controllers/ray/utils/httpproxy_httpclient.go index 9856c5d04f4..2436f8d4f1c 100644 --- a/ray-operator/controllers/ray/utils/httpproxy_httpclient.go +++ b/ray-operator/controllers/ray/utils/httpproxy_httpclient.go @@ -1,43 +1,58 @@ package utils import ( + "context" "fmt" "io" "net/http" "time" + + ctrl "sigs.k8s.io/controller-runtime" ) type RayHttpProxyClientInterface interface { InitClient() - CheckHealth() error - SetHostIp(hostIp string, port int) + CheckProxyActorHealth(ctx context.Context) error + SetHostIp(hostIp, podNamespace, podName string, port int) } -func GetRayHttpProxyClient() RayHttpProxyClientInterface { - return &RayHttpProxyClient{} +func GetRayHttpProxyClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayHttpProxyClientInterface { + return func() RayHttpProxyClientInterface { + return &RayHttpProxyClient{ + mgr: mgr, + useKubernetesProxy: useKubernetesProxy, + } + } } type RayHttpProxyClient struct { - client http.Client - httpProxyURL string + client *http.Client + mgr ctrl.Manager + httpProxyURL string + useKubernetesProxy bool } func (r *RayHttpProxyClient) InitClient() { - r.client = http.Client{ - Timeout: 20 * time.Millisecond, + r.client = &http.Client{ + Timeout: 2 * time.Second, } } -func (r *RayHttpProxyClient) SetHostIp(hostIp string, port int) { +func (r *RayHttpProxyClient) SetHostIp(hostIp, podNamespace, podName string, port int) { + if r.useKubernetesProxy { + r.client = r.mgr.GetHTTPClient() + r.httpProxyURL = fmt.Sprintf("%s/api/v1/namespaces/%s/pods/%s:%d/proxy/", r.mgr.GetConfig().Host, podNamespace, podName, port) + } + r.httpProxyURL = fmt.Sprintf("http://%s:%d/", hostIp, port) } -func (r *RayHttpProxyClient) CheckHealth() error { - req, err := http.NewRequest("GET", r.httpProxyURL+RayServeProxyHealthPath, nil) +// CheckProxyActorHealth checks the health status of the Ray Serve proxy actor. +func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, r.httpProxyURL+RayServeProxyHealthPath, nil) if err != nil { return err } - resp, err := r.client.Do(req) if err != nil { return err @@ -45,8 +60,9 @@ func (r *RayHttpProxyClient) CheckHealth() error { defer resp.Body.Close() body, _ := io.ReadAll(resp.Body) - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return fmt.Errorf("RayHttpProxyClient CheckHealth fail: %s %s", resp.Status, string(body)) + if resp.StatusCode != 200 { + err := fmt.Errorf("CheckProxyActorHealth fails. status code: %d, status: %s, body: %s", resp.StatusCode, resp.Status, string(body)) + return err } return nil diff --git a/ray-operator/controllers/ray/utils/serve_api_models.go b/ray-operator/controllers/ray/utils/serve_api_models.go index 68671af910e..33d4129e3fb 100644 --- a/ray-operator/controllers/ray/utils/serve_api_models.go +++ b/ray-operator/controllers/ray/utils/serve_api_models.go @@ -15,10 +15,10 @@ type ServeDeploymentStatus struct { // Describes the status of an application type ServeApplicationStatus struct { + Deployments map[string]ServeDeploymentStatus `json:"deployments"` Name string `json:"name,omitempty"` Status string `json:"status"` Message string `json:"message,omitempty"` - Deployments map[string]ServeDeploymentStatus `json:"deployments"` } // V2 Serve API Response format. These extend the ServeDeploymentStatus and ServeApplicationStatus structs, @@ -30,10 +30,10 @@ type ServeDeploymentDetails struct { } type ServeApplicationDetails struct { - ServeApplicationStatus - RoutePrefix string `json:"route_prefix,omitempty"` - DocsPath string `json:"docs_path,omitempty"` Deployments map[string]ServeDeploymentDetails `json:"deployments"` + ServeApplicationStatus + RoutePrefix string `json:"route_prefix,omitempty"` + DocsPath string `json:"docs_path,omitempty"` } type ServeDetails struct { diff --git a/ray-operator/controllers/ray/utils/util.go b/ray-operator/controllers/ray/utils/util.go index dabcd865006..376a68dcf27 100644 --- a/ray-operator/controllers/ray/utils/util.go +++ b/ray-operator/controllers/ray/utils/util.go @@ -2,7 +2,7 @@ package utils import ( "context" - "crypto/sha1" + "crypto/sha1" //nolint:gosec // We are not using this for security purposes "encoding/base32" "fmt" "math" @@ -10,18 +10,22 @@ import ( "reflect" "strconv" "strings" - "time" "unicode" + "k8s.io/apimachinery/pkg/api/resource" + + "sigs.k8s.io/controller-runtime/pkg/manager" + batchv1 "k8s.io/api/batch/v1" "k8s.io/apimachinery/pkg/util/json" "k8s.io/apimachinery/pkg/util/rand" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) const ( @@ -68,6 +72,65 @@ func IsCreated(pod *corev1.Pod) bool { return pod.Status.Phase != "" } +func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition { + headPodReadyCondition := metav1.Condition{ + Type: string(rayv1.HeadPodReady), + Status: metav1.ConditionFalse, + Reason: rayv1.UnknownReason, + } + + for _, cond := range headPod.Status.Conditions { + if cond.Type != corev1.PodReady { + continue + } + // Set the status based on the PodReady condition + headPodReadyCondition.Status = metav1.ConditionStatus(cond.Status) + headPodReadyCondition.Message = cond.Message + + // Determine the reason; default to HeadPodRunningAndReady if the headPod is ready but no specific reason is provided + reason := cond.Reason + if cond.Status == corev1.ConditionTrue && reason == "" { + reason = rayv1.HeadPodRunningAndReady + } + + // Update the reason if it's not empty + if reason != "" { + headPodReadyCondition.Reason = reason + } + + // Since we're only interested in the PodReady condition, break after processing it + break + } + return headPodReadyCondition +} + +// FindRayClusterSuspendStatus returns the current suspend status from two conditions: +// 1. rayv1.RayClusterSuspending +// 2. rayv1.RayClusterSuspended +// +// The two conditions should not be both True at the same time. The transition logic should be the following: +// +// rayv1.RayClusterSuspending: +// False by default +// False -> True: when `spec.Suspend` is true. +// True -> False: when all Pods are deleted, set rayv1.RayClusterSuspended from False to True. +// rayv1.RayClusterSuspended +// False by default +// False -> True: when suspending transitions from True to False +// True -> False: when `spec.Suspend` is false. +// +// If both rayv1.RayClusterSuspending and rayv1.RayClusterSuspended are False, FindRayClusterSuspendStatus returns "". +func FindRayClusterSuspendStatus(instance *rayv1.RayCluster) rayv1.RayClusterConditionType { + for _, cond := range instance.Status.Conditions { + if cond.Type == string(rayv1.RayClusterSuspending) || cond.Type == string(rayv1.RayClusterSuspended) { + if cond.Status == metav1.ConditionTrue { + return rayv1.RayClusterConditionType(cond.Type) + } + } + } + return "" +} + // IsRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady. func IsRunningAndReady(pod *corev1.Pod) bool { if pod.Status.Phase != corev1.PodRunning { @@ -94,7 +157,7 @@ func CheckRouteName(ctx context.Context, s string, n string) string { if len(s) > maxLength { // shorten the name - log.Info(fmt.Sprintf("route name is too long: len = %v, we will shorten it to = %v\n", len(s), maxLength)) + log.Info("Route name is too long, we will shorten it to the max length", "nameLength", len(s), "maxLength", maxLength) s = s[:maxLength] } @@ -102,6 +165,21 @@ func CheckRouteName(ctx context.Context, s string, n string) string { return CheckName(s) } +// PodGenerateName returns the value that should be used for a Pod's generateName +// based on the RayCluster name and node type (head or worker). +func PodGenerateName(prefix string, nodeType rayv1.RayNodeType) string { + maxPrefixLength := 50 // 63 - (max(8,6) + 5 ) // 6 to 8 char are consumed at the end with "-head-" or -worker- + 5 generated. + + var podPrefix string + if len(prefix) <= maxPrefixLength { + podPrefix = prefix + } else { + podPrefix = prefix[:maxPrefixLength] + } + + return strings.ToLower(podPrefix + DashSymbol + string(nodeType) + DashSymbol) +} + // CheckName makes sure the name does not start with a numeric value and the total length is < 63 char func CheckName(s string) string { maxLength := 50 // 63 - (max(8,6) + 5 ) // 6 to 8 char are consumed at the end with "-head-" or -worker- + 5 generated. @@ -244,9 +322,12 @@ func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.Wo log := ctrl.LoggerFrom(ctx) // Always adhere to min/max replicas constraints. var workerReplicas int32 + if workerGroupSpec.Suspend != nil && *workerGroupSpec.Suspend { + return 0 + } if *workerGroupSpec.MinReplicas > *workerGroupSpec.MaxReplicas { - log.Info(fmt.Sprintf("minReplicas (%v) is greater than maxReplicas (%v), using maxReplicas as desired replicas. "+ - "Please fix this to avoid any unexpected behaviors.", *workerGroupSpec.MinReplicas, *workerGroupSpec.MaxReplicas)) + log.Info("minReplicas is greater than maxReplicas, using maxReplicas as desired replicas. "+ + "Please fix this to avoid any unexpected behaviors.", "minReplicas", *workerGroupSpec.MinReplicas, "maxReplicas", *workerGroupSpec.MaxReplicas) workerReplicas = *workerGroupSpec.MaxReplicas } else if workerGroupSpec.Replicas == nil || *workerGroupSpec.Replicas < *workerGroupSpec.MinReplicas { // Replicas is impossible to be nil as it has a default value assigned in the CRD. @@ -274,6 +355,9 @@ func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) in func CalculateMinReplicas(cluster *rayv1.RayCluster) int32 { count := int32(0) for _, nodeGroup := range cluster.Spec.WorkerGroupSpecs { + if nodeGroup.Suspend != nil && *nodeGroup.Suspend { + continue + } count += *nodeGroup.MinReplicas } @@ -284,12 +368,31 @@ func CalculateMinReplicas(cluster *rayv1.RayCluster) int32 { func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32 { count := int32(0) for _, nodeGroup := range cluster.Spec.WorkerGroupSpecs { + if nodeGroup.Suspend != nil && *nodeGroup.Suspend { + continue + } count += *nodeGroup.MaxReplicas } return count } +// CalculateReadyReplicas calculates ready worker replicas at the cluster level +// A worker is ready if its Pod has a PodCondition with type == Ready and status == True +func CalculateReadyReplicas(pods corev1.PodList) int32 { + count := int32(0) + for _, pod := range pods.Items { + if val, ok := pod.Labels[RayNodeTypeLabelKey]; !ok || val != string(rayv1.WorkerNode) { + continue + } + if IsRunningAndReady(&pod) { + count++ + } + } + + return count +} + // CalculateAvailableReplicas calculates available worker replicas at the cluster level // A worker is available if its Pod is running func CalculateAvailableReplicas(pods corev1.PodList) int32 { @@ -308,10 +411,13 @@ func CalculateAvailableReplicas(pods corev1.PodList) int32 { func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList { desiredResourcesList := []corev1.ResourceList{{}} - headPodResource := calculatePodResource(cluster.Spec.HeadGroupSpec.Template.Spec) + headPodResource := CalculatePodResource(cluster.Spec.HeadGroupSpec.Template.Spec) desiredResourcesList = append(desiredResourcesList, headPodResource) for _, nodeGroup := range cluster.Spec.WorkerGroupSpecs { - podResource := calculatePodResource(nodeGroup.Template.Spec) + if nodeGroup.Suspend != nil && *nodeGroup.Suspend { + continue + } + podResource := CalculatePodResource(nodeGroup.Template.Spec) for i := int32(0); i < *nodeGroup.Replicas; i++ { desiredResourcesList = append(desiredResourcesList, podResource) } @@ -321,10 +427,10 @@ func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList { func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList { minResourcesList := []corev1.ResourceList{{}} - headPodResource := calculatePodResource(cluster.Spec.HeadGroupSpec.Template.Spec) + headPodResource := CalculatePodResource(cluster.Spec.HeadGroupSpec.Template.Spec) minResourcesList = append(minResourcesList, headPodResource) for _, nodeGroup := range cluster.Spec.WorkerGroupSpecs { - podResource := calculatePodResource(nodeGroup.Template.Spec) + podResource := CalculatePodResource(nodeGroup.Template.Spec) for i := int32(0); i < *nodeGroup.MinReplicas; i++ { minResourcesList = append(minResourcesList, podResource) } @@ -332,12 +438,15 @@ func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList { return sumResourceList(minResourcesList) } -// calculatePodResource returns the total resources of a pod. +// CalculatePodResource returns the total resources of a pod. // Request values take precedence over limit values. -func calculatePodResource(podSpec corev1.PodSpec) corev1.ResourceList { +func CalculatePodResource(podSpec corev1.PodSpec) corev1.ResourceList { podResource := corev1.ResourceList{} for _, container := range podSpec.Containers { containerResource := container.Resources.Requests + if containerResource == nil { + containerResource = corev1.ResourceList{} + } for name, quantity := range container.Resources.Limits { if _, ok := containerResource[name]; !ok { containerResource[name] = quantity @@ -355,6 +464,14 @@ func calculatePodResource(podSpec corev1.PodSpec) corev1.ResourceList { return podResource } +func ConvertResourceListToMapString(resourceList corev1.ResourceList) map[string]resource.Quantity { + result := make(map[string]resource.Quantity) + for key, value := range resourceList { + result[string(key)] = value + } + return result +} + func sumResourceList(list []corev1.ResourceList) corev1.ResourceList { totalResource := corev1.ResourceList{} for _, l := range list { @@ -398,12 +515,12 @@ func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool { } for _, pod := range runningPods.Items { if pod.Status.Phase != corev1.PodRunning { - log.Info(fmt.Sprintf("CheckAllPodsRunning: Pod is not running; Pod Name: %s; Pod Status.Phase: %v", pod.Name, pod.Status.Phase)) + log.Info("CheckAllPodsRunning: Pod is not running.", "podName", pod.Name, "pod Status.Phase", pod.Status.Phase) return false } for _, cond := range pod.Status.Conditions { if cond.Type == corev1.PodReady && cond.Status != corev1.ConditionTrue { - log.Info(fmt.Sprintf("CheckAllPodsRunning: Pod is not ready; Pod Name: %s; Pod Status.Conditions[PodReady]: %v", pod.Name, cond)) + log.Info("CheckAllPodsRunning: Pod is not ready.", "podName", pod.Name, "pod Status.Conditions[PodReady]", cond) return false } } @@ -411,65 +528,6 @@ func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool { return true } -func PodNotMatchingTemplate(pod corev1.Pod, template corev1.PodTemplateSpec) bool { - if pod.Status.Phase == corev1.PodRunning && pod.ObjectMeta.DeletionTimestamp == nil { - if len(template.Spec.Containers) != len(pod.Spec.Containers) { - return true - } - cmap := map[string]*corev1.Container{} - for _, container := range pod.Spec.Containers { - cmap[container.Name] = &container - } - for _, container1 := range template.Spec.Containers { - if container2, ok := cmap[container1.Name]; ok { - if container1.Image != container2.Image { - // image name do not match - return true - } - if len(container1.Resources.Requests) != len(container2.Resources.Requests) || - len(container1.Resources.Limits) != len(container2.Resources.Limits) { - // resource entries do not match - return true - } - - resources1 := []corev1.ResourceList{ - container1.Resources.Requests, - container1.Resources.Limits, - } - resources2 := []corev1.ResourceList{ - container2.Resources.Requests, - container2.Resources.Limits, - } - for i := range resources1 { - // we need to make sure all fields match - for name, quantity1 := range resources1[i] { - if quantity2, ok := resources2[i][name]; ok { - if quantity1.Cmp(quantity2) != 0 { - // request amount does not match - return true - } - } else { - // no such request - return true - } - } - } - - // now we consider them equal - delete(cmap, container1.Name) - } else { - // container name do not match - return true - } - } - if len(cmap) != 0 { - // one or more containers do not match - return true - } - } - return false -} - // CompareJsonStruct This is a way to better compare if two objects are the same when they are json/yaml structs. reflect.DeepEqual will fail in some cases. func CompareJsonStruct(objA interface{}, objB interface{}) bool { a, err := json.Marshal(objA) @@ -492,14 +550,6 @@ func CompareJsonStruct(objA interface{}, objB interface{}) bool { return reflect.DeepEqual(v1, v2) } -func ConvertUnixTimeToMetav1Time(unixTime uint64) *metav1.Time { - // The Ray jobInfo returns the start_time, which is a unix timestamp in milliseconds. - // https://docs.ray.io/en/latest/cluster/jobs-package-ref.html#jobinfo - t := time.Unix(int64(unixTime)/1000, int64(unixTime)%1000*1000000) - kt := metav1.NewTime(t) - return &kt -} - // Json-serializes obj and returns its hash string func GenerateJsonHash(obj interface{}) (string, error) { serialObj, err := json.Marshal(obj) @@ -507,10 +557,10 @@ func GenerateJsonHash(obj interface{}) (string, error) { return "", err } - hashBytes := sha1.Sum(serialObj) + hashBytes := sha1.Sum(serialObj) //nolint:gosec // We are not using this for security purposes // Convert to an ASCII string - hashStr := string(base32.HexEncoding.EncodeToString(hashBytes[:])) + hashStr := base32.HexEncoding.EncodeToString(hashBytes[:]) return hashStr, nil } @@ -558,3 +608,34 @@ func EnvVarByName(envName string, envVars []corev1.EnvVar) (corev1.EnvVar, bool) } return corev1.EnvVar{}, false } + +type ClientProvider interface { + GetDashboardClient(mgr manager.Manager) func() RayDashboardClientInterface + GetHttpProxyClient(mgr manager.Manager) func() RayHttpProxyClientInterface +} + +func ManagedByExternalController(controllerName *string) *string { + if controllerName != nil && *controllerName != KubeRayController { + return controllerName + } + return nil +} + +func IsAutoscalingEnabled[T *rayv1.RayCluster | *rayv1.RayJob | *rayv1.RayService](obj T) bool { + switch obj := (interface{})(obj).(type) { + case *rayv1.RayCluster: + return obj.Spec.EnableInTreeAutoscaling != nil && *obj.Spec.EnableInTreeAutoscaling + case *rayv1.RayJob: + return obj.Spec.RayClusterSpec != nil && obj.Spec.RayClusterSpec.EnableInTreeAutoscaling != nil && *obj.Spec.RayClusterSpec.EnableInTreeAutoscaling + case *rayv1.RayService: + return obj.Spec.RayClusterSpec.EnableInTreeAutoscaling != nil && *obj.Spec.RayClusterSpec.EnableInTreeAutoscaling + default: + panic(fmt.Sprintf("unsupported type: %T", obj)) + } +} + +// Check if the RayCluster has GCS fault tolerance enabled. +func IsGCSFaultToleranceEnabled(instance rayv1.RayCluster) bool { + v, ok := instance.Annotations[RayFTEnabledAnnotationKey] + return (ok && strings.ToLower(v) == "true") || instance.Spec.GcsFaultToleranceOptions != nil +} diff --git a/ray-operator/controllers/ray/utils/util_test.go b/ray-operator/controllers/ray/utils/util_test.go index 63e59e5a810..47367d5920e 100644 --- a/ray-operator/controllers/ray/utils/util_test.go +++ b/ray-operator/controllers/ray/utils/util_test.go @@ -2,15 +2,16 @@ package utils import ( "context" + "errors" "testing" "github.com/stretchr/testify/assert" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" corev1 "k8s.io/api/core/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" ) func TestGetClusterDomainName(t *testing.T) { @@ -96,25 +97,88 @@ func TestCheckAllPodsRunning(t *testing.T) { } } -func TestCheckName(t *testing.T) { - // test 1 -> change - str := "72fbcc7e-a661-4b18e-ca41-e903-fc3ae634b18e-lazer090scholar-director-s" - str = CheckName(str) - if str != "rca41-e903-fc3ae634b18e-lazer090scholar-director-s" { - t.Fail() +func TestPodGenerateName(t *testing.T) { + tests := []struct { + name string + prefix string + nodeType rayv1.RayNodeType + expected string + }{ + { + name: "short cluster name, head pod", + prefix: "ray-cluster-01", + nodeType: rayv1.HeadNode, + expected: "ray-cluster-01-head-", + }, + { + name: "short cluster name, worker pod", + prefix: "ray-cluster-group-name-01", + nodeType: rayv1.WorkerNode, + expected: "ray-cluster-group-name-01-worker-", + }, + { + name: "long cluster name, head pod", + prefix: "ray-cluster-0000000000000000000000011111111122222233333333333333", + nodeType: rayv1.HeadNode, + expected: "ray-cluster-00000000000000000000000111111111222222-head-", + }, + { + name: "long cluster name, worker pod", + prefix: "ray-cluster-0000000000000000000000011111111122222233333333333333-group-name", + nodeType: rayv1.WorkerNode, + expected: "ray-cluster-00000000000000000000000111111111222222-worker-", + }, } - // test 2 -> change - str = "--------566666--------444433-----------222222----------4444" - str = CheckName(str) - if str != "r6666--------444433-----------222222----------4444" { - t.Fail() + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + str := PodGenerateName(test.prefix, test.nodeType) + if str != test.expected { + t.Logf("expected: %q", test.expected) + t.Logf("actual: %q", str) + t.Error("PodGenerateName returned an unexpected string") + } + + // 63 (max pod name length) - 5 random hexadecimal characters from generateName + if len(str) > 58 { + t.Error("Generated pod name is too long") + } + }) } +} - // test 3 -> keep - str = "acceptable-name-head-12345" - str = CheckName(str) - if str != "acceptable-name-head-12345" { - t.Fail() +func TestCheckName(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "shorten long string starting with numeric character", + input: "72fbcc7e-a661-4b18e-ca41-e903-fc3ae634b18e-lazer090scholar-director-s", + expected: "rca41-e903-fc3ae634b18e-lazer090scholar-director-s", + }, + { + name: "shorten long string starting with special character", + input: "--------566666--------444433-----------222222----------4444", + expected: "r6666--------444433-----------222222----------4444", + }, + { + name: "unchanged", + input: "acceptable-name-head-12345", + expected: "acceptable-name-head-12345", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + str := CheckName(test.input) + if str != test.expected { + t.Logf("expected: %q", test.expected) + t.Logf("actual: %q", str) + t.Error("CheckName returned an unexpected string") + } + }) } } @@ -201,6 +265,31 @@ func createSomePodWithCondition(typ corev1.PodConditionType, status corev1.Condi } } +func createRayHeadPodWithPhaseAndCondition(phase corev1.PodPhase, typ corev1.PodConditionType, status corev1.ConditionStatus) (pod *corev1.Pod) { + return &corev1.Pod{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Pod", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "raycluster-sample-head", + Namespace: "default", + Labels: map[string]string{ + "ray.io/node-type": string(rayv1.HeadNode), + }, + }, + Status: corev1.PodStatus{ + Phase: phase, + Conditions: []corev1.PodCondition{ + { + Type: typ, + Status: status, + }, + }, + }, + } +} + func TestGetHeadGroupServiceAccountName(t *testing.T) { tests := map[string]struct { input *rayv1.RayCluster @@ -252,181 +341,6 @@ func TestGetHeadGroupServiceAccountName(t *testing.T) { } } -func TestReconcile_CheckNeedRemoveOldPod(t *testing.T) { - namespaceStr := "default" - - headTemplate := corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-head", - Image: "rayproject/autoscaler", - Command: []string{"python"}, - Args: []string{"/opt/code.py"}, - Env: []corev1.EnvVar{ - { - Name: "MY_POD_IP", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.podIP", - }, - }, - }, - }, - }, - }, - }, - } - - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "headNode", - Namespace: namespaceStr, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-head", - Image: "rayproject/autoscaler", - Command: []string{"python"}, - Args: []string{"/opt/code.py"}, - }, - }, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - }, - } - - assert.Equal(t, PodNotMatchingTemplate(pod, headTemplate), false, "expect template & pod matching") - - pod.Spec.Containers = []corev1.Container{ - { - Name: "ray-head", - Image: "rayproject/autoscaler", - Command: []string{"python"}, - Args: []string{"/opt/code.py"}, - }, - { - Name: "ray-head", - Image: "rayproject/autoscaler", - Command: []string{"python"}, - Args: []string{"/opt/code.py"}, - }, - } - - assert.Equal(t, PodNotMatchingTemplate(pod, headTemplate), true, "expect template & pod with 2 containers not matching") - - workerTemplate := corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-worker", - Image: "rayproject/autoscaler", - Command: []string{"echo"}, - Args: []string{"Hello Ray"}, - Env: []corev1.EnvVar{ - { - Name: "MY_POD_IP", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.podIP", - }, - }, - }, - }, - }, - }, - }, - } - - pod = corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod1", - Namespace: namespaceStr, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-worker", - Image: "rayproject/autoscaler", - Command: []string{"echo"}, - Args: []string{"Hello Ray"}, - }, - }, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - }, - } - - assert.Equal(t, PodNotMatchingTemplate(pod, workerTemplate), false, "expect template & pod matching") - - workerTemplate = corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-worker", - Image: "rayproject/autoscaler", - Command: []string{"echo"}, - Args: []string{"Hello Ray"}, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("512Mi"), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("256m"), - corev1.ResourceMemory: resource.MustParse("256Mi"), - }, - }, - }, - }, - }, - } - - pod = corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pod1", - Namespace: namespaceStr, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "ray-worker", - Image: "rayproject/autoscaler", - Command: []string{"echo"}, - Args: []string{"Hello Ray"}, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("512Mi"), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("256m"), - corev1.ResourceMemory: resource.MustParse("256Mi"), - }, - }, - }, - }, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - }, - } - - assert.Equal(t, PodNotMatchingTemplate(pod, workerTemplate), false, "expect template & pod matching") - - pod.Spec.Containers[0].Resources.Limits[corev1.ResourceCPU] = resource.MustParse("50m") - - assert.Equal(t, PodNotMatchingTemplate(pod, workerTemplate), true, "expect template & pod not matching") - - pod.Spec.Containers[0].Resources.Limits[corev1.ResourceCPU] = resource.MustParse("500m") - pod.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = resource.MustParse("250m") - - assert.Equal(t, PodNotMatchingTemplate(pod, workerTemplate), true, "expect template & pod not matching") -} - func TestCalculateAvailableReplicas(t *testing.T) { podList := corev1.PodList{ Items: []corev1.Pod{ @@ -450,6 +364,12 @@ func TestCalculateAvailableReplicas(t *testing.T) { }, Status: corev1.PodStatus{ Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, }, }, { @@ -461,6 +381,12 @@ func TestCalculateAvailableReplicas(t *testing.T) { }, Status: corev1.PodStatus{ Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + }, }, }, { @@ -476,8 +402,12 @@ func TestCalculateAvailableReplicas(t *testing.T) { }, }, } - count := CalculateAvailableReplicas(podList) - assert.Equal(t, count, int32(1), "expect 1 available replica") + + availableCount := CalculateAvailableReplicas(podList) + assert.Equal(t, availableCount, int32(1), "expect 1 available replica") + + readyCount := CalculateReadyReplicas(podList) + assert.Equal(t, readyCount, int32(1), "expect 1 ready replica") } func TestFindContainerPort(t *testing.T) { @@ -583,6 +513,57 @@ func TestGetWorkerGroupDesiredReplicas(t *testing.T) { workerGroupSpec.MinReplicas = &maxReplicas workerGroupSpec.MaxReplicas = &minReplicas assert.Equal(t, GetWorkerGroupDesiredReplicas(ctx, workerGroupSpec), *workerGroupSpec.MaxReplicas) + + // Test 6: `WorkerGroupSpec.Suspend` is true. + suspend := true + workerGroupSpec.MinReplicas = &maxReplicas + workerGroupSpec.MaxReplicas = &minReplicas + workerGroupSpec.Suspend = &suspend + assert.Equal(t, GetWorkerGroupDesiredReplicas(ctx, workerGroupSpec), int32(0)) +} + +func TestCalculateMinReplicas(t *testing.T) { + // Test 1 + minReplicas := int32(1) + rayCluster := &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + MinReplicas: &minReplicas, + }, + }, + }, + } + assert.Equal(t, CalculateMinReplicas(rayCluster), minReplicas) + + // Test 2 + suspend := true + for i := range rayCluster.Spec.WorkerGroupSpecs { + rayCluster.Spec.WorkerGroupSpecs[i].Suspend = &suspend + } + assert.Equal(t, CalculateMinReplicas(rayCluster), int32(0)) +} + +func TestCalculateMaxReplicas(t *testing.T) { + // Test 1 + maxReplicas := int32(1) + rayCluster := &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + MaxReplicas: &maxReplicas, + }, + }, + }, + } + assert.Equal(t, CalculateMaxReplicas(rayCluster), maxReplicas) + + // Test 2 + suspend := true + for i := range rayCluster.Spec.WorkerGroupSpecs { + rayCluster.Spec.WorkerGroupSpecs[i].Suspend = &suspend + } + assert.Equal(t, CalculateMaxReplicas(rayCluster), int32(0)) } func TestCalculateDesiredReplicas(t *testing.T) { @@ -597,29 +578,29 @@ func TestCalculateDesiredReplicas(t *testing.T) { }{ "Both groups' Replicas are nil": { group1Replicas: nil, - group1MinReplicas: pointer.Int32(1), - group1MaxReplicas: pointer.Int32(5), + group1MinReplicas: ptr.To[int32](1), + group1MaxReplicas: ptr.To[int32](5), group2Replicas: nil, - group2MinReplicas: pointer.Int32(2), - group2MaxReplicas: pointer.Int32(5), + group2MinReplicas: ptr.To[int32](2), + group2MaxReplicas: ptr.To[int32](5), answer: 3, }, "Group1's Replicas is smaller than MinReplicas, and Group2's Replicas is more than MaxReplicas.": { - group1Replicas: pointer.Int32(0), - group1MinReplicas: pointer.Int32(2), - group1MaxReplicas: pointer.Int32(5), - group2Replicas: pointer.Int32(6), - group2MinReplicas: pointer.Int32(2), - group2MaxReplicas: pointer.Int32(5), + group1Replicas: ptr.To[int32](0), + group1MinReplicas: ptr.To[int32](2), + group1MaxReplicas: ptr.To[int32](5), + group2Replicas: ptr.To[int32](6), + group2MinReplicas: ptr.To[int32](2), + group2MaxReplicas: ptr.To[int32](5), answer: 7, }, "Group1's Replicas is more than MaxReplicas.": { - group1Replicas: pointer.Int32(6), - group1MinReplicas: pointer.Int32(2), - group1MaxReplicas: pointer.Int32(5), - group2Replicas: pointer.Int32(3), - group2MinReplicas: pointer.Int32(2), - group2MaxReplicas: pointer.Int32(5), + group1Replicas: ptr.To[int32](6), + group1MinReplicas: ptr.To[int32](2), + group1MaxReplicas: ptr.To[int32](5), + group2Replicas: ptr.To[int32](3), + group2MinReplicas: ptr.To[int32](2), + group2MaxReplicas: ptr.To[int32](5), answer: 8, }, } @@ -682,3 +663,159 @@ env_vars: }) } } + +func TestFindHeadPodReadyCondition(t *testing.T) { + tests := map[string]struct { + pod *corev1.Pod + expected metav1.Condition + }{ + "condition true if Ray head pod is running and ready": { + pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.PodReady, corev1.ConditionTrue), + expected: metav1.Condition{ + Type: string(rayv1.HeadPodReady), + Status: metav1.ConditionTrue, + }, + }, + "condition false if Ray head pod is not running": { + pod: createRayHeadPodWithPhaseAndCondition(corev1.PodPending, corev1.PodReady, corev1.ConditionFalse), + expected: metav1.Condition{ + Type: string(rayv1.HeadPodReady), + Status: metav1.ConditionFalse, + }, + }, + "condition false if Ray head pod is not ready": { + pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.PodReady, corev1.ConditionFalse), + expected: metav1.Condition{ + Type: string(rayv1.HeadPodReady), + Status: metav1.ConditionFalse, + }, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + headPodReadyCondition := FindHeadPodReadyCondition(tc.pod) + assert.Equal(t, tc.expected.Status, headPodReadyCondition.Status) + }) + } +} + +func TestErrRayClusterReplicaFailureReason(t *testing.T) { + assert.Equal(t, RayClusterReplicaFailureReason(ErrFailedDeleteAllPods), "FailedDeleteAllPods") + assert.Equal(t, RayClusterReplicaFailureReason(ErrFailedDeleteHeadPod), "FailedDeleteHeadPod") + assert.Equal(t, RayClusterReplicaFailureReason(ErrFailedCreateHeadPod), "FailedCreateHeadPod") + assert.Equal(t, RayClusterReplicaFailureReason(ErrFailedDeleteWorkerPod), "FailedDeleteWorkerPod") + assert.Equal(t, RayClusterReplicaFailureReason(ErrFailedCreateWorkerPod), "FailedCreateWorkerPod") + assert.Equal(t, RayClusterReplicaFailureReason(errors.Join(ErrFailedDeleteAllPods, errors.New("other error"))), "FailedDeleteAllPods") + assert.Equal(t, RayClusterReplicaFailureReason(errors.Join(ErrFailedDeleteHeadPod, errors.New("other error"))), "FailedDeleteHeadPod") + assert.Equal(t, RayClusterReplicaFailureReason(errors.Join(ErrFailedCreateHeadPod, errors.New("other error"))), "FailedCreateHeadPod") + assert.Equal(t, RayClusterReplicaFailureReason(errors.Join(ErrFailedDeleteWorkerPod, errors.New("other error"))), "FailedDeleteWorkerPod") + assert.Equal(t, RayClusterReplicaFailureReason(errors.Join(ErrFailedCreateWorkerPod, errors.New("other error"))), "FailedCreateWorkerPod") + assert.Equal(t, RayClusterReplicaFailureReason(errors.New("other error")), "") +} + +func TestIsAutoscalingEnabled(t *testing.T) { + // Test: RayCluster + cluster := &rayv1.RayCluster{} + assert.False(t, IsAutoscalingEnabled(cluster)) + + cluster = &rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + EnableInTreeAutoscaling: ptr.To[bool](true), + }, + } + assert.True(t, IsAutoscalingEnabled(cluster)) + + // Test: RayJob + job := &rayv1.RayJob{} + assert.False(t, IsAutoscalingEnabled(job)) + + job = &rayv1.RayJob{ + Spec: rayv1.RayJobSpec{ + RayClusterSpec: &rayv1.RayClusterSpec{ + EnableInTreeAutoscaling: ptr.To[bool](true), + }, + }, + } + assert.True(t, IsAutoscalingEnabled(job)) + + // Test: RayService + service := &rayv1.RayService{} + assert.False(t, IsAutoscalingEnabled(service)) + + service = &rayv1.RayService{ + Spec: rayv1.RayServiceSpec{ + RayClusterSpec: rayv1.RayClusterSpec{ + EnableInTreeAutoscaling: ptr.To[bool](true), + }, + }, + } + assert.True(t, IsAutoscalingEnabled(service)) +} + +func TestIsGCSFaultToleranceEnabled(t *testing.T) { + tests := []struct { + name string + instance rayv1.RayCluster + expected bool + }{ + { + name: "ray.io/ft-enabled is true", + instance: rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + RayFTEnabledAnnotationKey: "true", + }, + }, + }, + expected: true, + }, + { + name: "ray.io/ft-enabled is not set and GcsFaultToleranceOptions is set", + instance: rayv1.RayCluster{ + Spec: rayv1.RayClusterSpec{ + GcsFaultToleranceOptions: &rayv1.GcsFaultToleranceOptions{}, + }, + }, + expected: true, + }, + { + name: "ray.io/ft-enabled is false", + instance: rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + RayFTEnabledAnnotationKey: "false", + }, + }, + }, + expected: false, + }, + { + name: "ray.io/ft-enabled is not set and GcsFaultToleranceOptions is not set", + instance: rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + }, + expected: false, + }, + { + name: "ray.io/ft-enabled is using uppercase true", + instance: rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + RayFTEnabledAnnotationKey: "TRUE", + }, + }, + }, + expected: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := IsGCSFaultToleranceEnabled(test.instance) + assert.Equal(t, test.expected, result) + }) + } +} diff --git a/ray-operator/controllers/ray/utils/validation.go b/ray-operator/controllers/ray/utils/validation.go new file mode 100644 index 00000000000..7c3e7fe10c2 --- /dev/null +++ b/ray-operator/controllers/ray/utils/validation.go @@ -0,0 +1,18 @@ +package utils + +import ( + errstd "errors" + + "k8s.io/apimachinery/pkg/api/meta" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +func ValidateRayClusterStatus(instance *rayv1.RayCluster) error { + suspending := meta.IsStatusConditionTrue(instance.Status.Conditions, string(rayv1.RayClusterSuspending)) + suspended := meta.IsStatusConditionTrue(instance.Status.Conditions, string(rayv1.RayClusterSuspended)) + if suspending && suspended { + return errstd.New("invalid RayCluster State: rayv1.RayClusterSuspending and rayv1.RayClusterSuspended conditions should not be both true") + } + return nil +} diff --git a/ray-operator/controllers/ray/utils/validation_test.go b/ray-operator/controllers/ray/utils/validation_test.go new file mode 100644 index 00000000000..3468cc9d2dc --- /dev/null +++ b/ray-operator/controllers/ray/utils/validation_test.go @@ -0,0 +1,88 @@ +package utils + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +func TestValidateRayClusterStatus(t *testing.T) { + tests := []struct { + name string + conditions []metav1.Condition + expectError bool + }{ + { + name: "Both suspending and suspended are true", + conditions: []metav1.Condition{ + { + Type: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionTrue, + }, + { + Type: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionTrue, + }, + }, + expectError: true, + }, + { + name: "Only suspending is true", + conditions: []metav1.Condition{ + { + Type: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionTrue, + }, + { + Type: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionFalse, + }, + }, + expectError: false, + }, + { + name: "Only suspended is true", + conditions: []metav1.Condition{ + { + Type: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionFalse, + }, + { + Type: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionTrue, + }, + }, + expectError: false, + }, + { + name: "Both suspending and suspended are false", + conditions: []metav1.Condition{ + { + Type: string(rayv1.RayClusterSuspending), + Status: metav1.ConditionFalse, + }, + { + Type: string(rayv1.RayClusterSuspended), + Status: metav1.ConditionFalse, + }, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + instance := &rayv1.RayCluster{ + Status: rayv1.RayClusterStatus{ + Conditions: tt.conditions, + }, + } + err := ValidateRayClusterStatus(instance) + if (err != nil) != tt.expectError { + t.Errorf("ValidateRayClusterStatus() error = %v, wantErr %v", err, tt.expectError) + } + }) + } +} diff --git a/ray-operator/go.mod b/ray-operator/go.mod index 051b633e389..b41b5f3cdc6 100644 --- a/ray-operator/go.mod +++ b/ray-operator/go.mod @@ -1,33 +1,39 @@ module github.com/ray-project/kuberay/ray-operator -go 1.20 +go 1.22.0 + +toolchain go1.22.4 + +replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16 require ( - github.com/Masterminds/semver/v3 v3.2.0 - github.com/go-logr/logr v1.2.4 - github.com/go-logr/zapr v1.2.4 + github.com/Masterminds/semver/v3 v3.2.1 + github.com/go-logr/logr v1.4.2 + github.com/go-logr/zapr v1.3.0 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/jarcoal/httpmock v1.2.0 - github.com/onsi/ginkgo/v2 v2.11.0 - github.com/onsi/gomega v1.27.10 - github.com/openshift/api v0.0.0-20211209135129-c58d9f695577 + github.com/onsi/ginkgo/v2 v2.17.2 + github.com/onsi/gomega v1.33.1 + github.com/openshift/api v0.0.0-20240625084701-0689f006bcde github.com/orcaman/concurrent-map/v2 v2.0.1 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.16.0 - github.com/stretchr/testify v1.8.4 - go.uber.org/zap v1.25.0 + github.com/prometheus/client_golang v1.19.1 + github.com/stretchr/testify v1.9.0 + go.uber.org/zap v1.27.0 gopkg.in/natefinch/lumberjack.v2 v2.2.1 - k8s.io/api v0.28.4 - k8s.io/apiextensions-apiserver v0.28.4 - k8s.io/apimachinery v0.28.4 - k8s.io/apiserver v0.28.4 - k8s.io/client-go v0.28.4 - k8s.io/code-generator v0.28.4 - k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 - sigs.k8s.io/controller-runtime v0.16.3 - sigs.k8s.io/structured-merge-diff/v4 v4.2.3 - sigs.k8s.io/yaml v1.3.0 - volcano.sh/apis v1.6.0-alpha.0.0.20221012070524-685db38b4fae + k8s.io/api v0.30.2 + k8s.io/apiextensions-apiserver v0.29.6 + k8s.io/apimachinery v0.30.2 + k8s.io/apiserver v0.29.6 + k8s.io/client-go v0.29.6 + k8s.io/code-generator v0.29.6 + k8s.io/component-base v0.29.6 + k8s.io/klog/v2 v2.130.1 + k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 + sigs.k8s.io/controller-runtime v0.17.5 + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 + sigs.k8s.io/yaml v1.4.0 + volcano.sh/apis v1.9.0 ) replace go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.1 => go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 @@ -35,56 +41,57 @@ replace go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.1 => require ( github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect - github.com/evanphx/json-patch/v5 v5.6.0 // indirect - github.com/fsnotify/fsnotify v1.6.0 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/evanphx/json-patch v5.9.0+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.5.9 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect - github.com/google/uuid v1.3.1 // indirect - github.com/imdario/mergo v0.3.12 // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/moby/spdystream v0.2.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.4.0 // indirect - github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.10.1 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.54.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/mod v0.10.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/oauth2 v0.11.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.9.3 // indirect + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect + golang.org/x/mod v0.18.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/term v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.5.0 // indirect + golang.org/x/tools v0.22.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.32.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/component-base v0.28.4 // indirect - k8s.io/gengo v0.0.0-20220902162205-c0856e24416d // indirect - k8s.io/klog/v2 v2.100.1 // indirect - k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect + k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect + k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect + k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect ) diff --git a/ray-operator/go.sum b/ray-operator/go.sum index 646dba1d032..e5f311d24a2 100644 --- a/ray-operator/go.sum +++ b/ray-operator/go.sum @@ -1,416 +1,226 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= -github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= -github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= +github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= +github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/dave/dst v0.26.2/go.mod h1:UMDJuIRPfyUCC78eFuB+SV/WI8oDeyFDvM/JR6NI3IU= -github.com/dave/gopackages v0.0.0-20170318123100-46e7023ec56e/go.mod h1:i00+b/gKdIDIxuLDFob7ustLAVqhsZRk2qVZrArELGQ= -github.com/dave/jennifer v1.2.0/go.mod h1:fIb+770HOpJ2fmN9EPPKOqm1vMGhB+TwXKMZhrIygKg= -github.com/dave/kerr v0.0.0-20170318121727-bc25dd6abe8e/go.mod h1:qZqlPyPvfsDJt+3wHJ1EvSXDuVjFTK0j2p/ca+gtsb8= -github.com/dave/rebecca v0.9.1/go.mod h1:N6XYdMD/OKw3lkF3ywh8Z6wPGuwNFDNtWYEMFWEmXBA= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= -github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= -github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= -github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -github.com/getkin/kin-openapi v0.76.0/go.mod h1:660oXbgy5JFMKreazJaQTw7o+X00qeSyhcnluiMv+Xg= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= -github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= -github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= -github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20181127221834-b4f47329b966/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= -github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= -github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= -github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= +github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/jarcoal/httpmock v1.2.0 h1:gSvTxxFR/MEMfsGrvRbdfpRUMBStovlSRLw0Ep1bwwc= github.com/jarcoal/httpmock v1.2.0/go.mod h1:oCoTsnAz4+UoOUIf5lJOWV2QQIW5UoeUI6aM2YnWAZk= -github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= -github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/maxatome/go-testdeep v1.11.0 h1:Tgh5efyCYyJFGUYiT0qxBSIDeXw0F5zSoatlou685kk= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/maxatome/go-testdeep v1.11.0/go.mod h1:011SgQ6efzZYAen6fDn4BqQ+lUR72ysdyKe7Dyogw70= +github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= -github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= -github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= -github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= -github.com/openshift/api v0.0.0-20211209135129-c58d9f695577 h1:NUe82M8wMYXbd5s+WBAJ2QAZZivs+nhZ3zYgZFwKfqw= -github.com/openshift/api v0.0.0-20211209135129-c58d9f695577/go.mod h1:DoslCwtqUpr3d/gsbq4ZlkaMEdYqKxuypsDjorcHhME= -github.com/openshift/build-machinery-go v0.0.0-20210712174854-1bb7fd1518d3/go.mod h1:b1BuldmJlbA/xYtdZvKi+7j5YGB44qJUJDZ9zwiNCfE= +github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= +github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= +github.com/openshift/api v0.0.0-20240625084701-0689f006bcde h1:4rhIhSetmZ7hYjws4vcEdNFcZ6P2uVO+ED+ekifsHKI= +github.com/openshift/api v0.0.0-20240625084701-0689f006bcde/go.mod h1:OOh6Qopf21pSzqNVCB5gomomBXb8o5sGKZxG2KNpaXM= github.com/orcaman/concurrent-map/v2 v2.0.1 h1:jOJ5Pg2w1oeB6PeDurIYf6k9PQ+aTITr/6lP/L/zp6c= github.com/orcaman/concurrent-map/v2 v2.0.1/go.mod h1:9Eq3TG2oBe5FirmYWQfYO5iH1q0Jv47PLaNK++uCdOM= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= -github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= -github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= -github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= -github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= -github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.54.0 h1:ZlZy0BgJhTwVZUn7dLOkwCZHUkrAqd3WYtcFCWnM1D8= +github.com/prometheus/common v0.54.0/go.mod h1:/TQgMJP5CuVYveyT7n/0Ix8yLNNXy9yRSkhnLTHPDIQ= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= -go.uber.org/zap v1.25.0 h1:4Hvk6GtkucQ790dqmj7l1eEnRdKm3k3ZUrUMS2d5+5c= -go.uber.org/zap v1.25.0/go.mod h1:JIAUzQIH94IC4fOJQm7gMmBJP5k7wQfdcnYdPoEXJYk= -golang.org/x/arch v0.0.0-20180920145803-b19384d3c130/go.mod h1:cYlCBUl1MsqxdiKgmc4uh7TxZfWSFLOGSRR090WDxt8= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= -golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= -golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= -golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180903190138-2b024373dcd9/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200509030707-2212a7e161a5/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.6-0.20210820212750-d4cc65f0b2ff/go.mod h1:YD9qOF0M9xpSpdWTBbzEl5e/RnCefISl8E5Noe10jFM= -golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= -golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/src-d/go-billy.v4 v4.3.0/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/api v0.23.0/go.mod h1:8wmDdLBHBNxtOIytwLstXt5E9PddnZb0GaMcqsvDBpg= -k8s.io/api v0.28.4 h1:8ZBrLjwosLl/NYgv1P7EQLqoO8MGQApnbgH8tu3BMzY= -k8s.io/api v0.28.4/go.mod h1:axWTGrY88s/5YE+JSt4uUi6NMM+gur1en2REMR7IRj0= -k8s.io/apiextensions-apiserver v0.28.4 h1:AZpKY/7wQ8n+ZYDtNHbAJBb+N4AXXJvyZx6ww6yAJvU= -k8s.io/apiextensions-apiserver v0.28.4/go.mod h1:pgQIZ1U8eJSMQcENew/0ShUTlePcSGFq6dxSxf2mwPM= -k8s.io/apimachinery v0.23.0/go.mod h1:fFCTTBKvKcwTPFzjlcxp91uPFZr+JA0FubU4fLzzFYc= -k8s.io/apimachinery v0.28.4 h1:zOSJe1mc+GxuMnFzD4Z/U1wst50X28ZNsn5bhgIIao8= -k8s.io/apimachinery v0.28.4/go.mod h1:wI37ncBvfAoswfq626yPTe6Bz1c22L7uaJ8dho83mgg= -k8s.io/apiserver v0.28.4 h1:BJXlaQbAU/RXYX2lRz+E1oPe3G3TKlozMMCZWu5GMgg= -k8s.io/apiserver v0.28.4/go.mod h1:Idq71oXugKZoVGUUL2wgBCTHbUR+FYTWa4rq9j4n23w= -k8s.io/client-go v0.28.4 h1:Np5ocjlZcTrkyRJ3+T3PkXDpe4UpatQxj85+xjaD2wY= -k8s.io/client-go v0.28.4/go.mod h1:0VDZFpgoZfelyP5Wqu0/r/TRYcLYuJ2U1KEeoaPa1N4= -k8s.io/code-generator v0.23.0/go.mod h1:vQvOhDXhuzqiVfM/YHp+dmg10WDZCchJVObc9MvowsE= -k8s.io/code-generator v0.28.4 h1:tcOSNIZQvuAvXhOwpbuJkKbAABJQeyCcQBCN/3uI18c= -k8s.io/code-generator v0.28.4/go.mod h1:OQAfl6bZikQ/tK6faJ18Vyzo54rUII2NmjurHyiN1g4= -k8s.io/component-base v0.28.4 h1:c/iQLWPdUgI90O+T9TeECg8o7N3YJTiuz2sKxILYcYo= -k8s.io/component-base v0.28.4/go.mod h1:m9hR0uvqXDybiGL2nf/3Lf0MerAfQXzkfWhUY58JUbU= -k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= -k8s.io/gengo v0.0.0-20220902162205-c0856e24416d h1:U9tB195lKdzwqicbJvyJeOXV7Klv+wNAWENRnXEGi08= -k8s.io/gengo v0.0.0-20220902162205-c0856e24416d/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= -k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= +k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI= +k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI= +k8s.io/apiextensions-apiserver v0.29.6 h1:tUu1N6Zt9GT8KVcPF5aGDqfISz1mveM4yFh7eL5bxmE= +k8s.io/apiextensions-apiserver v0.29.6/go.mod h1:iw1EbwZat08I219qrQKoFMHGo7J9KxPqMpVKxCbNbCs= +k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg= +k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/apiserver v0.29.6 h1:JxgDbpgahOgqoDOf+zVl2mI+rQcHcLQnK6YhhtsjbNs= +k8s.io/apiserver v0.29.6/go.mod h1:HrQwfPWxhwEa+n8/+5YwSF5yT2WXbeyFjqq6KEXHTX8= +k8s.io/client-go v0.29.6 h1:5E2ebuB/p0F0THuQatyvhDvPL2SIeqwTPrtnrwKob/8= +k8s.io/client-go v0.29.6/go.mod h1:jHZcrQqDplyv20v7eu+iFM4gTpglZSZoMVcKrh8sRGg= +k8s.io/code-generator v0.29.6 h1:Z8T9VMR0mr7V5GG66c6GVAZrIiEy2uFoQwbeVeWLqPA= +k8s.io/code-generator v0.29.6/go.mod h1:7TYnI0dYItL2cKuhhgPSuF3WED9uMdELgbVXFfn/joE= +k8s.io/component-base v0.29.6 h1:XkVJI67FvBgNb/3kKqvaGKokxUrIR0RrksCPNI+JYCs= +k8s.io/component-base v0.29.6/go.mod h1:kIahZm8aw9lV8Vw17LF89REmeBrv5+QEl3v7HsrmITY= +k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 h1:pWEwq4Asjm4vjW7vcsmijwBhOr1/shsbSYiWXmNGlks= +k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo= +k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.30.0/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= -k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= -k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= -k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65/go.mod h1:sX9MT8g7NVZM5lVL/j8QyCCJe8YSMW30QvGZWaCIDIk= -k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= -k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM= -k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 h1:qY1Ad8PODbnymg2pRbkyMT/ylpTrCM8P2RJ0yroCyIk= -k8s.io/utils v0.0.0-20230406110748-d93618cff8a2/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigwG62c4= -sigs.k8s.io/controller-runtime v0.16.3/go.mod h1:j7bialYoSn142nv9sCOJmQgDXQXxnroFU4VnX/brVJ0= -sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6/go.mod h1:p4QtZmO4uMYipTQNzagwnNoseA6OxSUutVw05NhYDRs= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlLosk/JlfP3xQLNkQlHJeXw= +k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.17.5 h1:1FI9Lm7NiOOmBsgTV36/s2XrEFXnO2C4sbg/Zme72Rw= +sigs.k8s.io/controller-runtime v0.17.5/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/structured-merge-diff/v4 v4.1.2/go.mod h1:j/nl6xW8vLS49O8YvXW1ocPhZawJtm+Yrr7PPRQ0Vg4= -sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE= -sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= -volcano.sh/apis v1.6.0-alpha.0.0.20221012070524-685db38b4fae h1:H7yidKnIq/Y7KmjFP5xFSmE7xL674226D8pEoA/RfG8= -volcano.sh/apis v1.6.0-alpha.0.0.20221012070524-685db38b4fae/go.mod h1:drNMGuHPn1ew7oBSDQb5KRey6tXOQksbUtw3gPxF3Vo= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +volcano.sh/apis v1.9.0 h1:e+9yEbQOi6HvgaayAxYULT6n+59mkYvmqjKhp9Z06sY= +volcano.sh/apis v1.9.0/go.mod h1:yXNfsZRzAOq6EUyPJYFrlMorh1XsYQGonGWyr4IiznM= diff --git a/ray-operator/main.go b/ray-operator/main.go index dc75f2a14c9..0446ec49e67 100644 --- a/ray-operator/main.go +++ b/ray-operator/main.go @@ -19,7 +19,9 @@ import ( "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/selection" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" @@ -31,8 +33,8 @@ import ( configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray" - "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + "github.com/ray-project/kuberay/ray-operator/pkg/features" // +kubebuilder:scaffold:imports ) @@ -48,7 +50,6 @@ func init() { utilruntime.Must(routev1.Install(scheme)) utilruntime.Must(batchv1.AddToScheme(scheme)) utilruntime.Must(configapi.AddToScheme(scheme)) - batchscheduler.AddToScheme(scheme) // +kubebuilder:scaffold:scheme } @@ -59,10 +60,15 @@ func main() { var probeAddr string var reconcileConcurrency int var watchNamespace string + var forcedClusterUpgrade bool var logFile string var logFileEncoder string var logStdoutEncoder string + var useKubernetesProxy bool var configFile string + var featureGates string + var enableBatchScheduler bool + var batchScheduler string // TODO: remove flag-based config once Configuration API graduates to v1. flag.StringVar(&metricsAddr, "metrics-addr", configapi.DefaultMetricsAddr, "The address the metric endpoint binds to.") @@ -77,17 +83,22 @@ func main() { "watch-namespace", "", "Specify a list of namespaces to watch for custom resources, separated by commas. If left empty, all namespaces will be watched.") - flag.BoolVar(&ray.ForcedClusterUpgrade, "forced-cluster-upgrade", false, - "Forced cluster upgrade flag") + flag.BoolVar(&forcedClusterUpgrade, "forced-cluster-upgrade", false, + "(Deprecated) Forced cluster upgrade flag") flag.StringVar(&logFile, "log-file-path", "", "Synchronize logs to local file") flag.StringVar(&logFileEncoder, "log-file-encoder", "json", "Encoder to use for log file. Valid values are 'json' and 'console'. Defaults to 'json'") flag.StringVar(&logStdoutEncoder, "log-stdout-encoder", "json", "Encoder to use for logging stdout. Valid values are 'json' and 'console'. Defaults to 'json'") - flag.BoolVar(&ray.EnableBatchScheduler, "enable-batch-scheduler", false, - "Enable batch scheduler. Currently is volcano, which supports gang scheduler policy.") + flag.BoolVar(&enableBatchScheduler, "enable-batch-scheduler", false, + "(Deprecated) Enable batch scheduler. Currently is volcano, which supports gang scheduler policy. Please use --batch-scheduler instead.") + flag.StringVar(&batchScheduler, "batch-scheduler", "", + "Batch scheduler name, supported values are volcano and yunikorn.") flag.StringVar(&configFile, "config", "", "Path to structured config file. Flags are ignored if config file is set.") + flag.BoolVar(&useKubernetesProxy, "use-kubernetes-proxy", false, + "Use Kubernetes proxy subresource when connecting to the Ray Head node.") + flag.StringVar(&featureGates, "feature-gates", "", "A set of key=value pairs that describe feature gates. E.g. FeatureOne=true,FeatureTwo=false,...") opts := k8szap.Options{ TimeEncoder: zapcore.ISO8601TimeEncoder, @@ -103,10 +114,6 @@ func main() { config, err = decodeConfig(configData, scheme) exitOnError(err, "failed to decode config file") - - // TODO: remove globally-scoped variables - ray.ForcedClusterUpgrade = config.ForcedClusterUpgrade - ray.EnableBatchScheduler = config.EnableBatchScheduler } else { config.MetricsAddr = metricsAddr config.ProbeAddr = probeAddr @@ -114,11 +121,13 @@ func main() { config.LeaderElectionNamespace = leaderElectionNamespace config.ReconcileConcurrency = reconcileConcurrency config.WatchNamespace = watchNamespace - config.ForcedClusterUpgrade = ray.ForcedClusterUpgrade config.LogFile = logFile config.LogFileEncoder = logFileEncoder config.LogStdoutEncoder = logStdoutEncoder - config.EnableBatchScheduler = ray.EnableBatchScheduler + config.EnableBatchScheduler = enableBatchScheduler + config.BatchScheduler = batchScheduler + config.UseKubernetesProxy = useKubernetesProxy + config.DeleteRayJobAfterJobFinishes = os.Getenv(utils.DELETE_RAYJOB_CR_AFTER_JOB_FINISHES) == "true" } stdoutEncoder, err := newLogEncoder(logStdoutEncoder) @@ -145,16 +154,33 @@ func main() { combineLoggerR := zapr.NewLogger(combineLogger) ctrl.SetLogger(combineLoggerR) + + // By default, the log from kubernetes/client-go is not json format. + // This will apply the logger to kubernetes/client-go and change it to json format. + klog.SetLogger(combineLoggerR) } else { - ctrl.SetLogger(k8szap.New(k8szap.UseFlagOptions(&opts))) + k8sLogger := k8szap.New(k8szap.UseFlagOptions(&opts)) + ctrl.SetLogger(k8sLogger) + + // By default, the log from kubernetes/client-go is not json format. + // This will apply the logger to kubernetes/client-go and change it to json format. + klog.SetLogger(k8sLogger) } - if ray.ForcedClusterUpgrade { - setupLog.Info("Feature flag forced-cluster-upgrade is enabled.") + if forcedClusterUpgrade { + setupLog.Info("Deprecated feature flag forced-cluster-upgrade is enabled, which has no effect.") } - if ray.EnableBatchScheduler { - setupLog.Info("Feature flag enable-batch-scheduler is enabled.") + + // validate the batch scheduler configs, + // exit with error if the configs is invalid. + if err := configapi.ValidateBatchSchedulerConfig(setupLog, config); err != nil { + exitOnError(err, "batch scheduler configs validation failed") + } + + if err := utilfeature.DefaultMutableFeatureGate.Set(featureGates); err != nil { + exitOnError(err, "Unable to set flag gates for known features") } + features.LogFeatureGates(setupLog) // Manager options options := ctrl.Options{ @@ -186,11 +212,11 @@ func main() { if watchNamespaces[0] == "" { setupLog.Info("Flag watchNamespace is not set. Watch custom resources in all namespaces.") } else { - setupLog.Info(fmt.Sprintf("Only watch custom resources in the namespace: %s", watchNamespaces[0])) + setupLog.Info("Only watch custom resources in the namespace.", "namespace", watchNamespaces[0]) options.Cache.DefaultNamespaces[watchNamespaces[0]] = cache.Config{} } } else { - setupLog.Info(fmt.Sprintf("Only watch custom resources in multiple namespaces: %v", watchNamespaces)) + setupLog.Info("Only watch custom resources in multiple namespaces.", "namespaces", watchNamespaces) for _, namespace := range watchNamespaces { options.Cache.DefaultNamespaces[namespace] = cache.Config{} } @@ -207,11 +233,11 @@ func main() { WorkerSidecarContainers: config.WorkerSidecarContainers, } ctx := ctrl.SetupSignalHandler() - exitOnError(ray.NewReconciler(ctx, mgr, rayClusterOptions).SetupWithManager(mgr, config.ReconcileConcurrency), + exitOnError(ray.NewReconciler(ctx, mgr, rayClusterOptions, config).SetupWithManager(mgr, config.ReconcileConcurrency), "unable to create controller", "controller", "RayCluster") - exitOnError(ray.NewRayServiceReconciler(ctx, mgr, utils.GetRayDashboardClient, utils.GetRayHttpProxyClient).SetupWithManager(mgr), + exitOnError(ray.NewRayServiceReconciler(ctx, mgr, config).SetupWithManager(mgr, config.ReconcileConcurrency), "unable to create controller", "controller", "RayService") - exitOnError(ray.NewRayJobReconciler(ctx, mgr, utils.GetRayDashboardClient).SetupWithManager(mgr), + exitOnError(ray.NewRayJobReconciler(ctx, mgr, config).SetupWithManager(mgr, config.ReconcileConcurrency), "unable to create controller", "controller", "RayJob") if os.Getenv("ENABLE_WEBHOOKS") == "true" { diff --git a/ray-operator/main_test.go b/ray-operator/main_test.go index 85e02e1b205..211a87e925c 100644 --- a/ray-operator/main_test.go +++ b/ray-operator/main_test.go @@ -7,7 +7,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1" ) @@ -16,9 +16,9 @@ func Test_decodeConfig(t *testing.T) { testcases := []struct { name string configData string + errContains string expectedConfig configapi.Configuration expectErr bool - errContains string }{ { name: "default config file", @@ -32,7 +32,7 @@ kind: Configuration }, MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), ReconcileConcurrency: 1, }, expectErr: false, @@ -53,7 +53,7 @@ reconcileConcurrency: 1 }, MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), ReconcileConcurrency: 1, }, expectErr: false, @@ -80,7 +80,7 @@ workerSidecarContainers: }, MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), ReconcileConcurrency: 1, HeadSidecarContainers: []corev1.Container{ { @@ -114,7 +114,7 @@ unknownfield: 1 }, MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), ReconcileConcurrency: 1, }, expectErr: false, @@ -135,7 +135,7 @@ reconcileConcurrency: true }, MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), ReconcileConcurrency: 0, }, expectErr: true, @@ -153,7 +153,7 @@ reconcileConcurrency: true expectedConfig: configapi.Configuration{ MetricsAddr: ":8080", ProbeAddr: ":8082", - EnableLeaderElection: pointer.Bool(true), + EnableLeaderElection: ptr.To(true), }, expectErr: true, errContains: `no kind "Configuration" is registered for version "config.ray.io/v1beta1" in scheme`, diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/appstatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/appstatus.go index 05b31793678..7e61c0ed93b 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/appstatus.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/appstatus.go @@ -9,10 +9,10 @@ import ( // AppStatusApplyConfiguration represents an declarative configuration of the AppStatus type for use // with apply. type AppStatusApplyConfiguration struct { - Status *string `json:"status,omitempty"` - Message *string `json:"message,omitempty"` HealthLastUpdateTime *v1.Time `json:"healthLastUpdateTime,omitempty"` Deployments map[string]ServeDeploymentStatusApplyConfiguration `json:"serveDeploymentStatuses,omitempty"` + Status *string `json:"status,omitempty"` + Message *string `json:"message,omitempty"` } // AppStatusApplyConfiguration constructs an declarative configuration of the AppStatus type for use with @@ -21,22 +21,6 @@ func AppStatus() *AppStatusApplyConfiguration { return &AppStatusApplyConfiguration{} } -// WithStatus sets the Status field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Status field is set to the value of the last call. -func (b *AppStatusApplyConfiguration) WithStatus(value string) *AppStatusApplyConfiguration { - b.Status = &value - return b -} - -// WithMessage sets the Message field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Message field is set to the value of the last call. -func (b *AppStatusApplyConfiguration) WithMessage(value string) *AppStatusApplyConfiguration { - b.Message = &value - return b -} - // WithHealthLastUpdateTime sets the HealthLastUpdateTime field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the HealthLastUpdateTime field is set to the value of the last call. @@ -58,3 +42,19 @@ func (b *AppStatusApplyConfiguration) WithDeployments(entries map[string]ServeDe } return b } + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *AppStatusApplyConfiguration) WithStatus(value string) *AppStatusApplyConfiguration { + b.Status = &value + return b +} + +// WithMessage sets the Message field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Message field is set to the value of the last call. +func (b *AppStatusApplyConfiguration) WithMessage(value string) *AppStatusApplyConfiguration { + b.Message = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/autoscaleroptions.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/autoscaleroptions.go index 1a14e46d13f..192df732a7a 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/autoscaleroptions.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/autoscaleroptions.go @@ -13,12 +13,12 @@ type AutoscalerOptionsApplyConfiguration struct { Resources *v1.ResourceRequirements `json:"resources,omitempty"` Image *string `json:"image,omitempty"` ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"` - Env []v1.EnvVar `json:"env,omitempty"` - EnvFrom []v1.EnvFromSource `json:"envFrom,omitempty"` - VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"` SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"` IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` UpscalingMode *rayv1.UpscalingMode `json:"upscalingMode,omitempty"` + Env []v1.EnvVar `json:"env,omitempty"` + EnvFrom []v1.EnvFromSource `json:"envFrom,omitempty"` + VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"` } // AutoscalerOptionsApplyConfiguration constructs an declarative configuration of the AutoscalerOptions type for use with @@ -51,6 +51,30 @@ func (b *AutoscalerOptionsApplyConfiguration) WithImagePullPolicy(value v1.PullP return b } +// WithSecurityContext sets the SecurityContext field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the SecurityContext field is set to the value of the last call. +func (b *AutoscalerOptionsApplyConfiguration) WithSecurityContext(value v1.SecurityContext) *AutoscalerOptionsApplyConfiguration { + b.SecurityContext = &value + return b +} + +// WithIdleTimeoutSeconds sets the IdleTimeoutSeconds field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the IdleTimeoutSeconds field is set to the value of the last call. +func (b *AutoscalerOptionsApplyConfiguration) WithIdleTimeoutSeconds(value int32) *AutoscalerOptionsApplyConfiguration { + b.IdleTimeoutSeconds = &value + return b +} + +// WithUpscalingMode sets the UpscalingMode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the UpscalingMode field is set to the value of the last call. +func (b *AutoscalerOptionsApplyConfiguration) WithUpscalingMode(value rayv1.UpscalingMode) *AutoscalerOptionsApplyConfiguration { + b.UpscalingMode = &value + return b +} + // WithEnv adds the given value to the Env field in the declarative configuration // and returns the receiver, so that objects can be build by chaining "With" function invocations. // If called multiple times, values provided by each call will be appended to the Env field. @@ -80,27 +104,3 @@ func (b *AutoscalerOptionsApplyConfiguration) WithVolumeMounts(values ...v1.Volu } return b } - -// WithSecurityContext sets the SecurityContext field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the SecurityContext field is set to the value of the last call. -func (b *AutoscalerOptionsApplyConfiguration) WithSecurityContext(value v1.SecurityContext) *AutoscalerOptionsApplyConfiguration { - b.SecurityContext = &value - return b -} - -// WithIdleTimeoutSeconds sets the IdleTimeoutSeconds field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the IdleTimeoutSeconds field is set to the value of the last call. -func (b *AutoscalerOptionsApplyConfiguration) WithIdleTimeoutSeconds(value int32) *AutoscalerOptionsApplyConfiguration { - b.IdleTimeoutSeconds = &value - return b -} - -// WithUpscalingMode sets the UpscalingMode field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UpscalingMode field is set to the value of the last call. -func (b *AutoscalerOptionsApplyConfiguration) WithUpscalingMode(value rayv1.UpscalingMode) *AutoscalerOptionsApplyConfiguration { - b.UpscalingMode = &value - return b -} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/dashboardstatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/dashboardstatus.go deleted file mode 100644 index c7d3bb438cd..00000000000 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/dashboardstatus.go +++ /dev/null @@ -1,36 +0,0 @@ -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// DashboardStatusApplyConfiguration represents an declarative configuration of the DashboardStatus type for use -// with apply. -type DashboardStatusApplyConfiguration struct { - IsHealthy *bool `json:"isHealthy,omitempty"` - HealthLastUpdateTime *v1.Time `json:"healthLastUpdateTime,omitempty"` -} - -// DashboardStatusApplyConfiguration constructs an declarative configuration of the DashboardStatus type for use with -// apply. -func DashboardStatus() *DashboardStatusApplyConfiguration { - return &DashboardStatusApplyConfiguration{} -} - -// WithIsHealthy sets the IsHealthy field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the IsHealthy field is set to the value of the last call. -func (b *DashboardStatusApplyConfiguration) WithIsHealthy(value bool) *DashboardStatusApplyConfiguration { - b.IsHealthy = &value - return b -} - -// WithHealthLastUpdateTime sets the HealthLastUpdateTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the HealthLastUpdateTime field is set to the value of the last call. -func (b *DashboardStatusApplyConfiguration) WithHealthLastUpdateTime(value v1.Time) *DashboardStatusApplyConfiguration { - b.HealthLastUpdateTime = &value - return b -} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/gcsfaulttoleranceoptions.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/gcsfaulttoleranceoptions.go new file mode 100644 index 00000000000..85a51db7cc0 --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/gcsfaulttoleranceoptions.go @@ -0,0 +1,50 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +// GcsFaultToleranceOptionsApplyConfiguration represents an declarative configuration of the GcsFaultToleranceOptions type for use +// with apply. +type GcsFaultToleranceOptionsApplyConfiguration struct { + RedisUsername *RedisCredentialApplyConfiguration `json:"redisUsername,omitempty"` + RedisPassword *RedisCredentialApplyConfiguration `json:"redisPassword,omitempty"` + ExternalStorageNamespace *string `json:"externalStorageNamespace,omitempty"` + RedisAddress *string `json:"redisAddress,omitempty"` +} + +// GcsFaultToleranceOptionsApplyConfiguration constructs an declarative configuration of the GcsFaultToleranceOptions type for use with +// apply. +func GcsFaultToleranceOptions() *GcsFaultToleranceOptionsApplyConfiguration { + return &GcsFaultToleranceOptionsApplyConfiguration{} +} + +// WithRedisUsername sets the RedisUsername field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RedisUsername field is set to the value of the last call. +func (b *GcsFaultToleranceOptionsApplyConfiguration) WithRedisUsername(value *RedisCredentialApplyConfiguration) *GcsFaultToleranceOptionsApplyConfiguration { + b.RedisUsername = value + return b +} + +// WithRedisPassword sets the RedisPassword field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RedisPassword field is set to the value of the last call. +func (b *GcsFaultToleranceOptionsApplyConfiguration) WithRedisPassword(value *RedisCredentialApplyConfiguration) *GcsFaultToleranceOptionsApplyConfiguration { + b.RedisPassword = value + return b +} + +// WithExternalStorageNamespace sets the ExternalStorageNamespace field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ExternalStorageNamespace field is set to the value of the last call. +func (b *GcsFaultToleranceOptionsApplyConfiguration) WithExternalStorageNamespace(value string) *GcsFaultToleranceOptionsApplyConfiguration { + b.ExternalStorageNamespace = &value + return b +} + +// WithRedisAddress sets the RedisAddress field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RedisAddress field is set to the value of the last call. +func (b *GcsFaultToleranceOptionsApplyConfiguration) WithRedisAddress(value string) *GcsFaultToleranceOptionsApplyConfiguration { + b.RedisAddress = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/headinfo.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/headinfo.go index 73e6964f428..703d1e889a7 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/headinfo.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/headinfo.go @@ -5,8 +5,10 @@ package v1 // HeadInfoApplyConfiguration represents an declarative configuration of the HeadInfo type for use // with apply. type HeadInfoApplyConfiguration struct { - PodIP *string `json:"podIP,omitempty"` - ServiceIP *string `json:"serviceIP,omitempty"` + PodIP *string `json:"podIP,omitempty"` + ServiceIP *string `json:"serviceIP,omitempty"` + PodName *string `json:"podName,omitempty"` + ServiceName *string `json:"serviceName,omitempty"` } // HeadInfoApplyConfiguration constructs an declarative configuration of the HeadInfo type for use with @@ -30,3 +32,19 @@ func (b *HeadInfoApplyConfiguration) WithServiceIP(value string) *HeadInfoApplyC b.ServiceIP = &value return b } + +// WithPodName sets the PodName field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PodName field is set to the value of the last call. +func (b *HeadInfoApplyConfiguration) WithPodName(value string) *HeadInfoApplyConfiguration { + b.PodName = &value + return b +} + +// WithServiceName sets the ServiceName field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ServiceName field is set to the value of the last call. +func (b *HeadInfoApplyConfiguration) WithServiceName(value string) *HeadInfoApplyConfiguration { + b.ServiceName = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclustercondition.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclustercondition.go new file mode 100644 index 00000000000..504c4663337 --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclustercondition.go @@ -0,0 +1,74 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// RayClusterConditionApplyConfiguration represents an declarative configuration of the RayClusterCondition type for use +// with apply. +type RayClusterConditionApplyConfiguration struct { + Type *v1.RayClusterConditionType `json:"type,omitempty"` + Status *corev1.ConditionStatus `json:"status,omitempty"` + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"` + Reason *string `json:"reason,omitempty"` + Message *string `json:"message,omitempty"` +} + +// RayClusterConditionApplyConfiguration constructs an declarative configuration of the RayClusterCondition type for use with +// apply. +func RayClusterCondition() *RayClusterConditionApplyConfiguration { + return &RayClusterConditionApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithType(value v1.RayClusterConditionType) *RayClusterConditionApplyConfiguration { + b.Type = &value + return b +} + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithStatus(value corev1.ConditionStatus) *RayClusterConditionApplyConfiguration { + b.Status = &value + return b +} + +// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastUpdateTime field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithLastUpdateTime(value metav1.Time) *RayClusterConditionApplyConfiguration { + b.LastUpdateTime = &value + return b +} + +// WithLastTransitionTime sets the LastTransitionTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastTransitionTime field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithLastTransitionTime(value metav1.Time) *RayClusterConditionApplyConfiguration { + b.LastTransitionTime = &value + return b +} + +// WithReason sets the Reason field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Reason field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithReason(value string) *RayClusterConditionApplyConfiguration { + b.Reason = &value + return b +} + +// WithMessage sets the Message field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Message field is set to the value of the last call. +func (b *RayClusterConditionApplyConfiguration) WithMessage(value string) *RayClusterConditionApplyConfiguration { + b.Message = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterspec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterspec.go index 22cb74811d9..8f3e840799d 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterspec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterspec.go @@ -5,13 +5,15 @@ package v1 // RayClusterSpecApplyConfiguration represents an declarative configuration of the RayClusterSpec type for use // with apply. type RayClusterSpecApplyConfiguration struct { - HeadGroupSpec *HeadGroupSpecApplyConfiguration `json:"headGroupSpec,omitempty"` - WorkerGroupSpecs []WorkerGroupSpecApplyConfiguration `json:"workerGroupSpecs,omitempty"` - RayVersion *string `json:"rayVersion,omitempty"` - EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` - AutoscalerOptions *AutoscalerOptionsApplyConfiguration `json:"autoscalerOptions,omitempty"` - HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` - Suspend *bool `json:"suspend,omitempty"` + Suspend *bool `json:"suspend,omitempty"` + ManagedBy *string `json:"managedBy,omitempty"` + AutoscalerOptions *AutoscalerOptionsApplyConfiguration `json:"autoscalerOptions,omitempty"` + HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` + EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` + GcsFaultToleranceOptions *GcsFaultToleranceOptionsApplyConfiguration `json:"gcsFaultToleranceOptions,omitempty"` + HeadGroupSpec *HeadGroupSpecApplyConfiguration `json:"headGroupSpec,omitempty"` + RayVersion *string `json:"rayVersion,omitempty"` + WorkerGroupSpecs []WorkerGroupSpecApplyConfiguration `json:"workerGroupSpecs,omitempty"` } // RayClusterSpecApplyConfiguration constructs an declarative configuration of the RayClusterSpec type for use with @@ -20,40 +22,19 @@ func RayClusterSpec() *RayClusterSpecApplyConfiguration { return &RayClusterSpecApplyConfiguration{} } -// WithHeadGroupSpec sets the HeadGroupSpec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the HeadGroupSpec field is set to the value of the last call. -func (b *RayClusterSpecApplyConfiguration) WithHeadGroupSpec(value *HeadGroupSpecApplyConfiguration) *RayClusterSpecApplyConfiguration { - b.HeadGroupSpec = value - return b -} - -// WithWorkerGroupSpecs adds the given value to the WorkerGroupSpecs field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, values provided by each call will be appended to the WorkerGroupSpecs field. -func (b *RayClusterSpecApplyConfiguration) WithWorkerGroupSpecs(values ...*WorkerGroupSpecApplyConfiguration) *RayClusterSpecApplyConfiguration { - for i := range values { - if values[i] == nil { - panic("nil value passed to WithWorkerGroupSpecs") - } - b.WorkerGroupSpecs = append(b.WorkerGroupSpecs, *values[i]) - } - return b -} - -// WithRayVersion sets the RayVersion field in the declarative configuration to the given value +// WithSuspend sets the Suspend field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RayVersion field is set to the value of the last call. -func (b *RayClusterSpecApplyConfiguration) WithRayVersion(value string) *RayClusterSpecApplyConfiguration { - b.RayVersion = &value +// If called multiple times, the Suspend field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithSuspend(value bool) *RayClusterSpecApplyConfiguration { + b.Suspend = &value return b } -// WithEnableInTreeAutoscaling sets the EnableInTreeAutoscaling field in the declarative configuration to the given value +// WithManagedBy sets the ManagedBy field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the EnableInTreeAutoscaling field is set to the value of the last call. -func (b *RayClusterSpecApplyConfiguration) WithEnableInTreeAutoscaling(value bool) *RayClusterSpecApplyConfiguration { - b.EnableInTreeAutoscaling = &value +// If called multiple times, the ManagedBy field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithManagedBy(value string) *RayClusterSpecApplyConfiguration { + b.ManagedBy = &value return b } @@ -79,10 +60,47 @@ func (b *RayClusterSpecApplyConfiguration) WithHeadServiceAnnotations(entries ma return b } -// WithSuspend sets the Suspend field in the declarative configuration to the given value +// WithEnableInTreeAutoscaling sets the EnableInTreeAutoscaling field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Suspend field is set to the value of the last call. -func (b *RayClusterSpecApplyConfiguration) WithSuspend(value bool) *RayClusterSpecApplyConfiguration { - b.Suspend = &value +// If called multiple times, the EnableInTreeAutoscaling field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithEnableInTreeAutoscaling(value bool) *RayClusterSpecApplyConfiguration { + b.EnableInTreeAutoscaling = &value + return b +} + +// WithGcsFaultToleranceOptions sets the GcsFaultToleranceOptions field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the GcsFaultToleranceOptions field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithGcsFaultToleranceOptions(value *GcsFaultToleranceOptionsApplyConfiguration) *RayClusterSpecApplyConfiguration { + b.GcsFaultToleranceOptions = value + return b +} + +// WithHeadGroupSpec sets the HeadGroupSpec field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the HeadGroupSpec field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithHeadGroupSpec(value *HeadGroupSpecApplyConfiguration) *RayClusterSpecApplyConfiguration { + b.HeadGroupSpec = value + return b +} + +// WithRayVersion sets the RayVersion field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RayVersion field is set to the value of the last call. +func (b *RayClusterSpecApplyConfiguration) WithRayVersion(value string) *RayClusterSpecApplyConfiguration { + b.RayVersion = &value + return b +} + +// WithWorkerGroupSpecs adds the given value to the WorkerGroupSpecs field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the WorkerGroupSpecs field. +func (b *RayClusterSpecApplyConfiguration) WithWorkerGroupSpecs(values ...*WorkerGroupSpecApplyConfiguration) *RayClusterSpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithWorkerGroupSpecs") + } + b.WorkerGroupSpecs = append(b.WorkerGroupSpecs, *values[i]) + } return b } diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterstatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterstatus.go index 101e2ae84c7..96eeccdcf14 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterstatus.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayclusterstatus.go @@ -11,20 +11,23 @@ import ( // RayClusterStatusApplyConfiguration represents an declarative configuration of the RayClusterStatus type for use // with apply. type RayClusterStatusApplyConfiguration struct { - State *v1.ClusterState `json:"state,omitempty"` - AvailableWorkerReplicas *int32 `json:"availableWorkerReplicas,omitempty"` - DesiredWorkerReplicas *int32 `json:"desiredWorkerReplicas,omitempty"` - MinWorkerReplicas *int32 `json:"minWorkerReplicas,omitempty"` - MaxWorkerReplicas *int32 `json:"maxWorkerReplicas,omitempty"` - DesiredCPU *resource.Quantity `json:"desiredCPU,omitempty"` - DesiredMemory *resource.Quantity `json:"desiredMemory,omitempty"` - DesiredGPU *resource.Quantity `json:"desiredGPU,omitempty"` - DesiredTPU *resource.Quantity `json:"desiredTPU,omitempty"` - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` - Endpoints map[string]string `json:"endpoints,omitempty"` - Head *HeadInfoApplyConfiguration `json:"head,omitempty"` - Reason *string `json:"reason,omitempty"` - ObservedGeneration *int64 `json:"observedGeneration,omitempty"` + State *v1.ClusterState `json:"state,omitempty"` + DesiredCPU *resource.Quantity `json:"desiredCPU,omitempty"` + DesiredMemory *resource.Quantity `json:"desiredMemory,omitempty"` + DesiredGPU *resource.Quantity `json:"desiredGPU,omitempty"` + DesiredTPU *resource.Quantity `json:"desiredTPU,omitempty"` + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + StateTransitionTimes map[v1.ClusterState]*metav1.Time `json:"stateTransitionTimes,omitempty"` + Endpoints map[string]string `json:"endpoints,omitempty"` + Head *HeadInfoApplyConfiguration `json:"head,omitempty"` + Reason *string `json:"reason,omitempty"` + Conditions []metav1.Condition `json:"conditions,omitempty"` + ReadyWorkerReplicas *int32 `json:"readyWorkerReplicas,omitempty"` + AvailableWorkerReplicas *int32 `json:"availableWorkerReplicas,omitempty"` + DesiredWorkerReplicas *int32 `json:"desiredWorkerReplicas,omitempty"` + MinWorkerReplicas *int32 `json:"minWorkerReplicas,omitempty"` + MaxWorkerReplicas *int32 `json:"maxWorkerReplicas,omitempty"` + ObservedGeneration *int64 `json:"observedGeneration,omitempty"` } // RayClusterStatusApplyConfiguration constructs an declarative configuration of the RayClusterStatus type for use with @@ -41,38 +44,6 @@ func (b *RayClusterStatusApplyConfiguration) WithState(value v1.ClusterState) *R return b } -// WithAvailableWorkerReplicas sets the AvailableWorkerReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the AvailableWorkerReplicas field is set to the value of the last call. -func (b *RayClusterStatusApplyConfiguration) WithAvailableWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { - b.AvailableWorkerReplicas = &value - return b -} - -// WithDesiredWorkerReplicas sets the DesiredWorkerReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the DesiredWorkerReplicas field is set to the value of the last call. -func (b *RayClusterStatusApplyConfiguration) WithDesiredWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { - b.DesiredWorkerReplicas = &value - return b -} - -// WithMinWorkerReplicas sets the MinWorkerReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MinWorkerReplicas field is set to the value of the last call. -func (b *RayClusterStatusApplyConfiguration) WithMinWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { - b.MinWorkerReplicas = &value - return b -} - -// WithMaxWorkerReplicas sets the MaxWorkerReplicas field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the MaxWorkerReplicas field is set to the value of the last call. -func (b *RayClusterStatusApplyConfiguration) WithMaxWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { - b.MaxWorkerReplicas = &value - return b -} - // WithDesiredCPU sets the DesiredCPU field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the DesiredCPU field is set to the value of the last call. @@ -113,6 +84,20 @@ func (b *RayClusterStatusApplyConfiguration) WithLastUpdateTime(value metav1.Tim return b } +// WithStateTransitionTimes puts the entries into the StateTransitionTimes field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the StateTransitionTimes field, +// overwriting an existing map entries in StateTransitionTimes field with the same key. +func (b *RayClusterStatusApplyConfiguration) WithStateTransitionTimes(entries map[v1.ClusterState]*metav1.Time) *RayClusterStatusApplyConfiguration { + if b.StateTransitionTimes == nil && len(entries) > 0 { + b.StateTransitionTimes = make(map[v1.ClusterState]*metav1.Time, len(entries)) + } + for k, v := range entries { + b.StateTransitionTimes[k] = v + } + return b +} + // WithEndpoints puts the entries into the Endpoints field in the declarative configuration // and returns the receiver, so that objects can be build by chaining "With" function invocations. // If called multiple times, the entries provided by each call will be put on the Endpoints field, @@ -143,6 +128,56 @@ func (b *RayClusterStatusApplyConfiguration) WithReason(value string) *RayCluste return b } +// WithConditions adds the given value to the Conditions field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Conditions field. +func (b *RayClusterStatusApplyConfiguration) WithConditions(values ...metav1.Condition) *RayClusterStatusApplyConfiguration { + for i := range values { + b.Conditions = append(b.Conditions, values[i]) + } + return b +} + +// WithReadyWorkerReplicas sets the ReadyWorkerReplicas field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ReadyWorkerReplicas field is set to the value of the last call. +func (b *RayClusterStatusApplyConfiguration) WithReadyWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { + b.ReadyWorkerReplicas = &value + return b +} + +// WithAvailableWorkerReplicas sets the AvailableWorkerReplicas field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the AvailableWorkerReplicas field is set to the value of the last call. +func (b *RayClusterStatusApplyConfiguration) WithAvailableWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { + b.AvailableWorkerReplicas = &value + return b +} + +// WithDesiredWorkerReplicas sets the DesiredWorkerReplicas field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the DesiredWorkerReplicas field is set to the value of the last call. +func (b *RayClusterStatusApplyConfiguration) WithDesiredWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { + b.DesiredWorkerReplicas = &value + return b +} + +// WithMinWorkerReplicas sets the MinWorkerReplicas field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the MinWorkerReplicas field is set to the value of the last call. +func (b *RayClusterStatusApplyConfiguration) WithMinWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { + b.MinWorkerReplicas = &value + return b +} + +// WithMaxWorkerReplicas sets the MaxWorkerReplicas field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the MaxWorkerReplicas field is set to the value of the last call. +func (b *RayClusterStatusApplyConfiguration) WithMaxWorkerReplicas(value int32) *RayClusterStatusApplyConfiguration { + b.MaxWorkerReplicas = &value + return b +} + // WithObservedGeneration sets the ObservedGeneration field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ObservedGeneration field is set to the value of the last call. diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobspec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobspec.go index 489c68ad363..f453176236f 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobspec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobspec.go @@ -10,21 +10,25 @@ import ( // RayJobSpecApplyConfiguration represents an declarative configuration of the RayJobSpec type for use // with apply. type RayJobSpecApplyConfiguration struct { - Entrypoint *string `json:"entrypoint,omitempty"` - Metadata map[string]string `json:"metadata,omitempty"` - RuntimeEnvYAML *string `json:"runtimeEnvYAML,omitempty"` - JobId *string `json:"jobId,omitempty"` - ShutdownAfterJobFinishes *bool `json:"shutdownAfterJobFinishes,omitempty"` - TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` ActiveDeadlineSeconds *int32 `json:"activeDeadlineSeconds,omitempty"` + BackoffLimit *int32 `json:"backoffLimit,omitempty"` RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterSpec,omitempty"` + SubmitterPodTemplate *corev1.PodTemplateSpecApplyConfiguration `json:"submitterPodTemplate,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` ClusterSelector map[string]string `json:"clusterSelector,omitempty"` + SubmitterConfig *SubmitterConfigApplyConfiguration `json:"submitterConfig,omitempty"` + ManagedBy *string `json:"managedBy,omitempty"` + DeletionPolicy *rayv1.DeletionPolicy `json:"deletionPolicy,omitempty"` + Entrypoint *string `json:"entrypoint,omitempty"` + RuntimeEnvYAML *string `json:"runtimeEnvYAML,omitempty"` + JobId *string `json:"jobId,omitempty"` SubmissionMode *rayv1.JobSubmissionMode `json:"submissionMode,omitempty"` - Suspend *bool `json:"suspend,omitempty"` - SubmitterPodTemplate *corev1.PodTemplateSpecApplyConfiguration `json:"submitterPodTemplate,omitempty"` + EntrypointResources *string `json:"entrypointResources,omitempty"` EntrypointNumCpus *float32 `json:"entrypointNumCpus,omitempty"` EntrypointNumGpus *float32 `json:"entrypointNumGpus,omitempty"` - EntrypointResources *string `json:"entrypointResources,omitempty"` + TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` + ShutdownAfterJobFinishes *bool `json:"shutdownAfterJobFinishes,omitempty"` + Suspend *bool `json:"suspend,omitempty"` } // RayJobSpecApplyConfiguration constructs an declarative configuration of the RayJobSpec type for use with @@ -33,11 +37,35 @@ func RayJobSpec() *RayJobSpecApplyConfiguration { return &RayJobSpecApplyConfiguration{} } -// WithEntrypoint sets the Entrypoint field in the declarative configuration to the given value +// WithActiveDeadlineSeconds sets the ActiveDeadlineSeconds field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Entrypoint field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithEntrypoint(value string) *RayJobSpecApplyConfiguration { - b.Entrypoint = &value +// If called multiple times, the ActiveDeadlineSeconds field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithActiveDeadlineSeconds(value int32) *RayJobSpecApplyConfiguration { + b.ActiveDeadlineSeconds = &value + return b +} + +// WithBackoffLimit sets the BackoffLimit field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the BackoffLimit field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithBackoffLimit(value int32) *RayJobSpecApplyConfiguration { + b.BackoffLimit = &value + return b +} + +// WithRayClusterSpec sets the RayClusterSpec field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RayClusterSpec field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithRayClusterSpec(value *RayClusterSpecApplyConfiguration) *RayJobSpecApplyConfiguration { + b.RayClusterSpec = value + return b +} + +// WithSubmitterPodTemplate sets the SubmitterPodTemplate field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the SubmitterPodTemplate field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithSubmitterPodTemplate(value *corev1.PodTemplateSpecApplyConfiguration) *RayJobSpecApplyConfiguration { + b.SubmitterPodTemplate = value return b } @@ -55,65 +83,65 @@ func (b *RayJobSpecApplyConfiguration) WithMetadata(entries map[string]string) * return b } -// WithRuntimeEnvYAML sets the RuntimeEnvYAML field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RuntimeEnvYAML field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithRuntimeEnvYAML(value string) *RayJobSpecApplyConfiguration { - b.RuntimeEnvYAML = &value +// WithClusterSelector puts the entries into the ClusterSelector field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the ClusterSelector field, +// overwriting an existing map entries in ClusterSelector field with the same key. +func (b *RayJobSpecApplyConfiguration) WithClusterSelector(entries map[string]string) *RayJobSpecApplyConfiguration { + if b.ClusterSelector == nil && len(entries) > 0 { + b.ClusterSelector = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.ClusterSelector[k] = v + } return b } -// WithJobId sets the JobId field in the declarative configuration to the given value +// WithSubmitterConfig sets the SubmitterConfig field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the JobId field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithJobId(value string) *RayJobSpecApplyConfiguration { - b.JobId = &value +// If called multiple times, the SubmitterConfig field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithSubmitterConfig(value *SubmitterConfigApplyConfiguration) *RayJobSpecApplyConfiguration { + b.SubmitterConfig = value return b } -// WithShutdownAfterJobFinishes sets the ShutdownAfterJobFinishes field in the declarative configuration to the given value +// WithManagedBy sets the ManagedBy field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ShutdownAfterJobFinishes field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithShutdownAfterJobFinishes(value bool) *RayJobSpecApplyConfiguration { - b.ShutdownAfterJobFinishes = &value +// If called multiple times, the ManagedBy field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithManagedBy(value string) *RayJobSpecApplyConfiguration { + b.ManagedBy = &value return b } -// WithTTLSecondsAfterFinished sets the TTLSecondsAfterFinished field in the declarative configuration to the given value +// WithDeletionPolicy sets the DeletionPolicy field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TTLSecondsAfterFinished field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithTTLSecondsAfterFinished(value int32) *RayJobSpecApplyConfiguration { - b.TTLSecondsAfterFinished = &value +// If called multiple times, the DeletionPolicy field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithDeletionPolicy(value rayv1.DeletionPolicy) *RayJobSpecApplyConfiguration { + b.DeletionPolicy = &value return b } -// WithActiveDeadlineSeconds sets the ActiveDeadlineSeconds field in the declarative configuration to the given value +// WithEntrypoint sets the Entrypoint field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ActiveDeadlineSeconds field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithActiveDeadlineSeconds(value int32) *RayJobSpecApplyConfiguration { - b.ActiveDeadlineSeconds = &value +// If called multiple times, the Entrypoint field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithEntrypoint(value string) *RayJobSpecApplyConfiguration { + b.Entrypoint = &value return b } -// WithRayClusterSpec sets the RayClusterSpec field in the declarative configuration to the given value +// WithRuntimeEnvYAML sets the RuntimeEnvYAML field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RayClusterSpec field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithRayClusterSpec(value *RayClusterSpecApplyConfiguration) *RayJobSpecApplyConfiguration { - b.RayClusterSpec = value +// If called multiple times, the RuntimeEnvYAML field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithRuntimeEnvYAML(value string) *RayJobSpecApplyConfiguration { + b.RuntimeEnvYAML = &value return b } -// WithClusterSelector puts the entries into the ClusterSelector field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the ClusterSelector field, -// overwriting an existing map entries in ClusterSelector field with the same key. -func (b *RayJobSpecApplyConfiguration) WithClusterSelector(entries map[string]string) *RayJobSpecApplyConfiguration { - if b.ClusterSelector == nil && len(entries) > 0 { - b.ClusterSelector = make(map[string]string, len(entries)) - } - for k, v := range entries { - b.ClusterSelector[k] = v - } +// WithJobId sets the JobId field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the JobId field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithJobId(value string) *RayJobSpecApplyConfiguration { + b.JobId = &value return b } @@ -125,19 +153,11 @@ func (b *RayJobSpecApplyConfiguration) WithSubmissionMode(value rayv1.JobSubmiss return b } -// WithSuspend sets the Suspend field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Suspend field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithSuspend(value bool) *RayJobSpecApplyConfiguration { - b.Suspend = &value - return b -} - -// WithSubmitterPodTemplate sets the SubmitterPodTemplate field in the declarative configuration to the given value +// WithEntrypointResources sets the EntrypointResources field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the SubmitterPodTemplate field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithSubmitterPodTemplate(value *corev1.PodTemplateSpecApplyConfiguration) *RayJobSpecApplyConfiguration { - b.SubmitterPodTemplate = value +// If called multiple times, the EntrypointResources field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithEntrypointResources(value string) *RayJobSpecApplyConfiguration { + b.EntrypointResources = &value return b } @@ -157,10 +177,26 @@ func (b *RayJobSpecApplyConfiguration) WithEntrypointNumGpus(value float32) *Ray return b } -// WithEntrypointResources sets the EntrypointResources field in the declarative configuration to the given value +// WithTTLSecondsAfterFinished sets the TTLSecondsAfterFinished field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the EntrypointResources field is set to the value of the last call. -func (b *RayJobSpecApplyConfiguration) WithEntrypointResources(value string) *RayJobSpecApplyConfiguration { - b.EntrypointResources = &value +// If called multiple times, the TTLSecondsAfterFinished field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithTTLSecondsAfterFinished(value int32) *RayJobSpecApplyConfiguration { + b.TTLSecondsAfterFinished = &value + return b +} + +// WithShutdownAfterJobFinishes sets the ShutdownAfterJobFinishes field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ShutdownAfterJobFinishes field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithShutdownAfterJobFinishes(value bool) *RayJobSpecApplyConfiguration { + b.ShutdownAfterJobFinishes = &value + return b +} + +// WithSuspend sets the Suspend field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Suspend field is set to the value of the last call. +func (b *RayJobSpecApplyConfiguration) WithSuspend(value bool) *RayJobSpecApplyConfiguration { + b.Suspend = &value return b } diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobstatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobstatus.go index 596cf612fb5..eddd5fa2c3c 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobstatus.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayjobstatus.go @@ -19,6 +19,8 @@ type RayJobStatusApplyConfiguration struct { Message *string `json:"message,omitempty"` StartTime *metav1.Time `json:"startTime,omitempty"` EndTime *metav1.Time `json:"endTime,omitempty"` + Succeeded *int32 `json:"succeeded,omitempty"` + Failed *int32 `json:"failed,omitempty"` RayClusterStatus *RayClusterStatusApplyConfiguration `json:"rayClusterStatus,omitempty"` ObservedGeneration *int64 `json:"observedGeneration,omitempty"` } @@ -101,6 +103,22 @@ func (b *RayJobStatusApplyConfiguration) WithEndTime(value metav1.Time) *RayJobS return b } +// WithSucceeded sets the Succeeded field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Succeeded field is set to the value of the last call. +func (b *RayJobStatusApplyConfiguration) WithSucceeded(value int32) *RayJobStatusApplyConfiguration { + b.Succeeded = &value + return b +} + +// WithFailed sets the Failed field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Failed field is set to the value of the last call. +func (b *RayJobStatusApplyConfiguration) WithFailed(value int32) *RayJobStatusApplyConfiguration { + b.Failed = &value + return b +} + // WithRayClusterStatus sets the RayClusterStatus field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the RayClusterStatus field is set to the value of the last call. diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go index 066b0f97418..f51872bdf77 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go @@ -3,17 +3,19 @@ package v1 import ( - corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" ) // RayServiceSpecApplyConfiguration represents an declarative configuration of the RayServiceSpec type for use // with apply. type RayServiceSpecApplyConfiguration struct { - ServeConfigV2 *string `json:"serveConfigV2,omitempty"` - RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` - ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` - DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` - ServeService *corev1.Service `json:"serveService,omitempty"` + ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` + DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` + ServeService *v1.Service `json:"serveService,omitempty"` + UpgradeStrategy *RayServiceUpgradeStrategyApplyConfiguration `json:"upgradeStrategy,omitempty"` + ServeConfigV2 *string `json:"serveConfigV2,omitempty"` + RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` + ExcludeHeadPodFromServeSvc *bool `json:"excludeHeadPodFromServeSvc,omitempty"` } // RayServiceSpecApplyConfiguration constructs an declarative configuration of the RayServiceSpec type for use with @@ -22,22 +24,6 @@ func RayServiceSpec() *RayServiceSpecApplyConfiguration { return &RayServiceSpecApplyConfiguration{} } -// WithServeConfigV2 sets the ServeConfigV2 field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ServeConfigV2 field is set to the value of the last call. -func (b *RayServiceSpecApplyConfiguration) WithServeConfigV2(value string) *RayServiceSpecApplyConfiguration { - b.ServeConfigV2 = &value - return b -} - -// WithRayClusterSpec sets the RayClusterSpec field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the RayClusterSpec field is set to the value of the last call. -func (b *RayServiceSpecApplyConfiguration) WithRayClusterSpec(value *RayClusterSpecApplyConfiguration) *RayServiceSpecApplyConfiguration { - b.RayClusterSpec = value - return b -} - // WithServiceUnhealthySecondThreshold sets the ServiceUnhealthySecondThreshold field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ServiceUnhealthySecondThreshold field is set to the value of the last call. @@ -57,7 +43,39 @@ func (b *RayServiceSpecApplyConfiguration) WithDeploymentUnhealthySecondThreshol // WithServeService sets the ServeService field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ServeService field is set to the value of the last call. -func (b *RayServiceSpecApplyConfiguration) WithServeService(value corev1.Service) *RayServiceSpecApplyConfiguration { +func (b *RayServiceSpecApplyConfiguration) WithServeService(value v1.Service) *RayServiceSpecApplyConfiguration { b.ServeService = &value return b } + +// WithUpgradeStrategy sets the UpgradeStrategy field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the UpgradeStrategy field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithUpgradeStrategy(value *RayServiceUpgradeStrategyApplyConfiguration) *RayServiceSpecApplyConfiguration { + b.UpgradeStrategy = value + return b +} + +// WithServeConfigV2 sets the ServeConfigV2 field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ServeConfigV2 field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithServeConfigV2(value string) *RayServiceSpecApplyConfiguration { + b.ServeConfigV2 = &value + return b +} + +// WithRayClusterSpec sets the RayClusterSpec field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the RayClusterSpec field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithRayClusterSpec(value *RayClusterSpecApplyConfiguration) *RayServiceSpecApplyConfiguration { + b.RayClusterSpec = value + return b +} + +// WithExcludeHeadPodFromServeSvc sets the ExcludeHeadPodFromServeSvc field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ExcludeHeadPodFromServeSvc field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithExcludeHeadPodFromServeSvc(value bool) *RayServiceSpecApplyConfiguration { + b.ExcludeHeadPodFromServeSvc = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatuses.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatuses.go index 53941b8349d..864691909c7 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatuses.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatuses.go @@ -4,18 +4,18 @@ package v1 import ( rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // RayServiceStatusesApplyConfiguration represents an declarative configuration of the RayServiceStatuses type for use // with apply. type RayServiceStatusesApplyConfiguration struct { + LastUpdateTime *v1.Time `json:"lastUpdateTime,omitempty"` + ServiceStatus *rayv1.ServiceStatus `json:"serviceStatus,omitempty"` ActiveServiceStatus *RayServiceStatusApplyConfiguration `json:"activeServiceStatus,omitempty"` PendingServiceStatus *RayServiceStatusApplyConfiguration `json:"pendingServiceStatus,omitempty"` - ServiceStatus *rayv1.ServiceStatus `json:"serviceStatus,omitempty"` NumServeEndpoints *int32 `json:"numServeEndpoints,omitempty"` ObservedGeneration *int64 `json:"observedGeneration,omitempty"` - LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` } // RayServiceStatusesApplyConfiguration constructs an declarative configuration of the RayServiceStatuses type for use with @@ -24,6 +24,22 @@ func RayServiceStatuses() *RayServiceStatusesApplyConfiguration { return &RayServiceStatusesApplyConfiguration{} } +// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastUpdateTime field is set to the value of the last call. +func (b *RayServiceStatusesApplyConfiguration) WithLastUpdateTime(value v1.Time) *RayServiceStatusesApplyConfiguration { + b.LastUpdateTime = &value + return b +} + +// WithServiceStatus sets the ServiceStatus field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ServiceStatus field is set to the value of the last call. +func (b *RayServiceStatusesApplyConfiguration) WithServiceStatus(value rayv1.ServiceStatus) *RayServiceStatusesApplyConfiguration { + b.ServiceStatus = &value + return b +} + // WithActiveServiceStatus sets the ActiveServiceStatus field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ActiveServiceStatus field is set to the value of the last call. @@ -40,14 +56,6 @@ func (b *RayServiceStatusesApplyConfiguration) WithPendingServiceStatus(value *R return b } -// WithServiceStatus sets the ServiceStatus field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ServiceStatus field is set to the value of the last call. -func (b *RayServiceStatusesApplyConfiguration) WithServiceStatus(value rayv1.ServiceStatus) *RayServiceStatusesApplyConfiguration { - b.ServiceStatus = &value - return b -} - // WithNumServeEndpoints sets the NumServeEndpoints field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the NumServeEndpoints field is set to the value of the last call. @@ -63,11 +71,3 @@ func (b *RayServiceStatusesApplyConfiguration) WithObservedGeneration(value int6 b.ObservedGeneration = &value return b } - -// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the LastUpdateTime field is set to the value of the last call. -func (b *RayServiceStatusesApplyConfiguration) WithLastUpdateTime(value metav1.Time) *RayServiceStatusesApplyConfiguration { - b.LastUpdateTime = &value - return b -} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go new file mode 100644 index 00000000000..758e49dd2ab --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go @@ -0,0 +1,27 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +// RayServiceUpgradeSpecApplyConfiguration represents an declarative configuration of the RayServiceUpgradeSpec type for use +// with apply. +type RayServiceUpgradeSpecApplyConfiguration struct { + Type *v1.RayServiceUpgradeStrategy `json:"type,omitempty"` +} + +// RayServiceUpgradeSpecApplyConfiguration constructs an declarative configuration of the RayServiceUpgradeSpec type for use with +// apply. +func RayServiceUpgradeSpec() *RayServiceUpgradeSpecApplyConfiguration { + return &RayServiceUpgradeSpecApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *RayServiceUpgradeSpecApplyConfiguration) WithType(value v1.RayServiceUpgradeStrategy) *RayServiceUpgradeSpecApplyConfiguration { + b.Type = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go new file mode 100644 index 00000000000..ecf111103e7 --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go @@ -0,0 +1,27 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +// RayServiceUpgradeStrategyApplyConfiguration represents an declarative configuration of the RayServiceUpgradeStrategy type for use +// with apply. +type RayServiceUpgradeStrategyApplyConfiguration struct { + Type *v1.RayServiceUpgradeType `json:"type,omitempty"` +} + +// RayServiceUpgradeStrategyApplyConfiguration constructs an declarative configuration of the RayServiceUpgradeStrategy type for use with +// apply. +func RayServiceUpgradeStrategy() *RayServiceUpgradeStrategyApplyConfiguration { + return &RayServiceUpgradeStrategyApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *RayServiceUpgradeStrategyApplyConfiguration) WithType(value v1.RayServiceUpgradeType) *RayServiceUpgradeStrategyApplyConfiguration { + b.Type = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rediscredential.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rediscredential.go new file mode 100644 index 00000000000..8a99a46eeed --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rediscredential.go @@ -0,0 +1,36 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "k8s.io/api/core/v1" +) + +// RedisCredentialApplyConfiguration represents an declarative configuration of the RedisCredential type for use +// with apply. +type RedisCredentialApplyConfiguration struct { + ValueFrom *v1.EnvVarSource `json:"valueFrom,omitempty"` + Value *string `json:"value,omitempty"` +} + +// RedisCredentialApplyConfiguration constructs an declarative configuration of the RedisCredential type for use with +// apply. +func RedisCredential() *RedisCredentialApplyConfiguration { + return &RedisCredentialApplyConfiguration{} +} + +// WithValueFrom sets the ValueFrom field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ValueFrom field is set to the value of the last call. +func (b *RedisCredentialApplyConfiguration) WithValueFrom(value v1.EnvVarSource) *RedisCredentialApplyConfiguration { + b.ValueFrom = &value + return b +} + +// WithValue sets the Value field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Value field is set to the value of the last call. +func (b *RedisCredentialApplyConfiguration) WithValue(value string) *RedisCredentialApplyConfiguration { + b.Value = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/servedeploymentstatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/servedeploymentstatus.go index 3c0b8a61282..bc45191a953 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/servedeploymentstatus.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/servedeploymentstatus.go @@ -9,9 +9,9 @@ import ( // ServeDeploymentStatusApplyConfiguration represents an declarative configuration of the ServeDeploymentStatus type for use // with apply. type ServeDeploymentStatusApplyConfiguration struct { + HealthLastUpdateTime *v1.Time `json:"healthLastUpdateTime,omitempty"` Status *string `json:"status,omitempty"` Message *string `json:"message,omitempty"` - HealthLastUpdateTime *v1.Time `json:"healthLastUpdateTime,omitempty"` } // ServeDeploymentStatusApplyConfiguration constructs an declarative configuration of the ServeDeploymentStatus type for use with @@ -20,6 +20,14 @@ func ServeDeploymentStatus() *ServeDeploymentStatusApplyConfiguration { return &ServeDeploymentStatusApplyConfiguration{} } +// WithHealthLastUpdateTime sets the HealthLastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the HealthLastUpdateTime field is set to the value of the last call. +func (b *ServeDeploymentStatusApplyConfiguration) WithHealthLastUpdateTime(value v1.Time) *ServeDeploymentStatusApplyConfiguration { + b.HealthLastUpdateTime = &value + return b +} + // WithStatus sets the Status field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Status field is set to the value of the last call. @@ -35,11 +43,3 @@ func (b *ServeDeploymentStatusApplyConfiguration) WithMessage(value string) *Ser b.Message = &value return b } - -// WithHealthLastUpdateTime sets the HealthLastUpdateTime field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the HealthLastUpdateTime field is set to the value of the last call. -func (b *ServeDeploymentStatusApplyConfiguration) WithHealthLastUpdateTime(value v1.Time) *ServeDeploymentStatusApplyConfiguration { - b.HealthLastUpdateTime = &value - return b -} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/submitterconfig.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/submitterconfig.go new file mode 100644 index 00000000000..ab7a53f355a --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/submitterconfig.go @@ -0,0 +1,23 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +// SubmitterConfigApplyConfiguration represents an declarative configuration of the SubmitterConfig type for use +// with apply. +type SubmitterConfigApplyConfiguration struct { + BackoffLimit *int32 `json:"backoffLimit,omitempty"` +} + +// SubmitterConfigApplyConfiguration constructs an declarative configuration of the SubmitterConfig type for use with +// apply. +func SubmitterConfig() *SubmitterConfigApplyConfiguration { + return &SubmitterConfigApplyConfiguration{} +} + +// WithBackoffLimit sets the BackoffLimit field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the BackoffLimit field is set to the value of the last call. +func (b *SubmitterConfigApplyConfiguration) WithBackoffLimit(value int32) *SubmitterConfigApplyConfiguration { + b.BackoffLimit = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go index f4f4c7777cd..fa95cc17494 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/workergroupspec.go @@ -9,14 +9,16 @@ import ( // WorkerGroupSpecApplyConfiguration represents an declarative configuration of the WorkerGroupSpec type for use // with apply. type WorkerGroupSpecApplyConfiguration struct { - GroupName *string `json:"groupName,omitempty"` - Replicas *int32 `json:"replicas,omitempty"` - MinReplicas *int32 `json:"minReplicas,omitempty"` - MaxReplicas *int32 `json:"maxReplicas,omitempty"` - NumOfHosts *int32 `json:"numOfHosts,omitempty"` - RayStartParams map[string]string `json:"rayStartParams,omitempty"` - Template *v1.PodTemplateSpecApplyConfiguration `json:"template,omitempty"` - ScaleStrategy *ScaleStrategyApplyConfiguration `json:"scaleStrategy,omitempty"` + Suspend *bool `json:"suspend,omitempty"` + GroupName *string `json:"groupName,omitempty"` + Replicas *int32 `json:"replicas,omitempty"` + MinReplicas *int32 `json:"minReplicas,omitempty"` + MaxReplicas *int32 `json:"maxReplicas,omitempty"` + IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` + RayStartParams map[string]string `json:"rayStartParams,omitempty"` + Template *v1.PodTemplateSpecApplyConfiguration `json:"template,omitempty"` + ScaleStrategy *ScaleStrategyApplyConfiguration `json:"scaleStrategy,omitempty"` + NumOfHosts *int32 `json:"numOfHosts,omitempty"` } // WorkerGroupSpecApplyConfiguration constructs an declarative configuration of the WorkerGroupSpec type for use with @@ -25,6 +27,14 @@ func WorkerGroupSpec() *WorkerGroupSpecApplyConfiguration { return &WorkerGroupSpecApplyConfiguration{} } +// WithSuspend sets the Suspend field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Suspend field is set to the value of the last call. +func (b *WorkerGroupSpecApplyConfiguration) WithSuspend(value bool) *WorkerGroupSpecApplyConfiguration { + b.Suspend = &value + return b +} + // WithGroupName sets the GroupName field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the GroupName field is set to the value of the last call. @@ -57,11 +67,11 @@ func (b *WorkerGroupSpecApplyConfiguration) WithMaxReplicas(value int32) *Worker return b } -// WithNumOfHosts sets the NumOfHosts field in the declarative configuration to the given value +// WithIdleTimeoutSeconds sets the IdleTimeoutSeconds field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the NumOfHosts field is set to the value of the last call. -func (b *WorkerGroupSpecApplyConfiguration) WithNumOfHosts(value int32) *WorkerGroupSpecApplyConfiguration { - b.NumOfHosts = &value +// If called multiple times, the IdleTimeoutSeconds field is set to the value of the last call. +func (b *WorkerGroupSpecApplyConfiguration) WithIdleTimeoutSeconds(value int32) *WorkerGroupSpecApplyConfiguration { + b.IdleTimeoutSeconds = &value return b } @@ -94,3 +104,11 @@ func (b *WorkerGroupSpecApplyConfiguration) WithScaleStrategy(value *ScaleStrate b.ScaleStrategy = value return b } + +// WithNumOfHosts sets the NumOfHosts field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the NumOfHosts field is set to the value of the last call. +func (b *WorkerGroupSpecApplyConfiguration) WithNumOfHosts(value int32) *WorkerGroupSpecApplyConfiguration { + b.NumOfHosts = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/utils.go b/ray-operator/pkg/client/applyconfiguration/utils.go index 35cc7873662..40fb4ce55d4 100644 --- a/ray-operator/pkg/client/applyconfiguration/utils.go +++ b/ray-operator/pkg/client/applyconfiguration/utils.go @@ -17,6 +17,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &rayv1.AppStatusApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("AutoscalerOptions"): return &rayv1.AutoscalerOptionsApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("GcsFaultToleranceOptions"): + return &rayv1.GcsFaultToleranceOptionsApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("HeadGroupSpec"): return &rayv1.HeadGroupSpecApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("HeadInfo"): @@ -41,10 +43,16 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &rayv1.RayServiceStatusApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("RayServiceStatuses"): return &rayv1.RayServiceStatusesApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("RayServiceUpgradeStrategy"): + return &rayv1.RayServiceUpgradeStrategyApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("RedisCredential"): + return &rayv1.RedisCredentialApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ScaleStrategy"): return &rayv1.ScaleStrategyApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ServeDeploymentStatus"): return &rayv1.ServeDeploymentStatusApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("SubmitterConfig"): + return &rayv1.SubmitterConfigApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("WorkerGroupSpec"): return &rayv1.WorkerGroupSpecApplyConfiguration{} diff --git a/ray-operator/pkg/client/informers/externalversions/factory.go b/ray-operator/pkg/client/informers/externalversions/factory.go index bf77b0ee136..4c12d6df3ee 100644 --- a/ray-operator/pkg/client/informers/externalversions/factory.go +++ b/ray-operator/pkg/client/informers/externalversions/factory.go @@ -26,6 +26,7 @@ type sharedInformerFactory struct { lock sync.Mutex defaultResync time.Duration customResync map[reflect.Type]time.Duration + transform cache.TransformFunc informers map[reflect.Type]cache.SharedIndexInformer // startedInformers is used for tracking which informers have been started. @@ -64,6 +65,14 @@ func WithNamespace(namespace string) SharedInformerOption { } } +// WithTransform sets a transform on all informers. +func WithTransform(transform cache.TransformFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.transform = transform + return factory + } +} + // NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { return NewSharedInformerFactoryWithOptions(client, defaultResync) @@ -168,6 +177,7 @@ func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internal } informer = newFunc(f.client, resyncPeriod) + informer.SetTransform(f.transform) f.informers[informerType] = informer return informer diff --git a/ray-operator/pkg/features/features.go b/ray-operator/pkg/features/features.go new file mode 100644 index 00000000000..3df4422d600 --- /dev/null +++ b/ray-operator/pkg/features/features.go @@ -0,0 +1,56 @@ +package features + +import ( + "testing" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/util/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" + featuregatetesting "k8s.io/component-base/featuregate/testing" +) + +const ( + // owner: @rueian @kevin85421 @andrewsykim + // rep: https://github.com/ray-project/enhancements/pull/54 + // alpha: v1.2 + // + // Enables new conditions in RayCluster status + RayClusterStatusConditions featuregate.Feature = "RayClusterStatusConditions" + + // owner: @andrewsykim + // rep: N/A + // alpha: v1.3 + // + // Enables new deletion policy API in RayJob + RayJobDeletionPolicy featuregate.Feature = "RayJobDeletionPolicy" +) + +func init() { + runtime.Must(utilfeature.DefaultMutableFeatureGate.Add(defaultFeatureGates)) +} + +var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ + RayClusterStatusConditions: {Default: true, PreRelease: featuregate.Beta}, + RayJobDeletionPolicy: {Default: false, PreRelease: featuregate.Alpha}, +} + +// SetFeatureGateDuringTest is a helper method to override feature gates in tests. +func SetFeatureGateDuringTest(tb testing.TB, f featuregate.Feature, value bool) func() { + return featuregatetesting.SetFeatureGateDuringTest(tb, utilfeature.DefaultFeatureGate, f, value) +} + +// Enabled is helper for `utilfeature.DefaultFeatureGate.Enabled()` +func Enabled(f featuregate.Feature) bool { + return utilfeature.DefaultFeatureGate.Enabled(f) +} + +func LogFeatureGates(log logr.Logger) { + features := make(map[featuregate.Feature]bool, len(defaultFeatureGates)) + for f := range utilfeature.DefaultMutableFeatureGate.GetAll() { + if _, ok := defaultFeatureGates[f]; ok { + features[f] = Enabled(f) + } + } + log.Info("Loaded feature gates", "featureGates", features) +} diff --git a/ray-operator/pkg/utils/string_conversion.go b/ray-operator/pkg/utils/string_conversion.go new file mode 100644 index 00000000000..39bba9214ec --- /dev/null +++ b/ray-operator/pkg/utils/string_conversion.go @@ -0,0 +1,37 @@ +// Util functions for type conversion between byte array and string without copy. +// +// Example usage 1 (convert string to byte slice): +// func TakeByteSlice(bs []byte) {...} +// +// func f() { +// s := "helloworld" +// TakeByteSlice(ConvertStringToByteSlice(s)) // convert string to byte slice with zero-copy +// } +// +// Example usage 2 (convert byte slice to string): +// func TakeString(s string) {...} +// +// func f() { +// bytes := []byte("helloworld") +// TakeByteSlice(ConvertByteSliceToString(bytes)) // convert byte slice to string with zero-copy +// } + +package utils + +import ( + "unsafe" +) + +// Convert a byte array to string w/o copy. +// +// WARNING: The input byte slice is not expected to change. +func ConvertByteSliceToString(arr []byte) string { + return unsafe.String(&arr[0], len(arr)) +} + +// Convert a string to byte array w/o copy. +// +// WARNING: The returned byte slice is not expected to change. +func ConvertStringToByteSlice(s string) (arr []byte) { + return unsafe.Slice(unsafe.StringData(s), len(s)) +} diff --git a/ray-operator/test/e2e/long_running_counter.py b/ray-operator/test/e2e/long_running_counter.py new file mode 100644 index 00000000000..7ba0b5f53d3 --- /dev/null +++ b/ray-operator/test/e2e/long_running_counter.py @@ -0,0 +1,26 @@ +import ray +import os +import time + +ray.init() + +@ray.remote +class Counter: + def __init__(self): + # Used to verify runtimeEnv + self.name = os.getenv("counter_name") + assert self.name == "test_counter" + self.counter = 0 + + def inc(self): + self.counter += 1 + + def get_counter(self): + return "{} got {}".format(self.name, self.counter) + +counter = Counter.remote() + +for _ in range(60): + ray.get(counter.inc.remote()) + print(ray.get(counter.get_counter.remote())) + time.sleep(1) diff --git a/ray-operator/test/e2e/raycluster_gcsft_test.go b/ray-operator/test/e2e/raycluster_gcsft_test.go new file mode 100644 index 00000000000..58cc892cae9 --- /dev/null +++ b/ray-operator/test/e2e/raycluster_gcsft_test.go @@ -0,0 +1,238 @@ +package e2e + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestGcsFaultToleranceOptions(t *testing.T) { + // Each test uses a separate namespace to utilize different Redis instances + // for better isolation. + testCases := []struct { + rayClusterFn func(namespace string) *rayv1ac.RayClusterApplyConfiguration + name string + redisPassword string + createSecret bool + }{ + { + name: "No Redis Password", + redisPassword: "", + rayClusterFn: func(namespace string) *rayv1ac.RayClusterApplyConfiguration { + return rayv1ac.RayCluster("raycluster-gcsft", namespace).WithSpec( + newRayClusterSpec().WithGcsFaultToleranceOptions( + rayv1ac.GcsFaultToleranceOptions(). + WithRedisAddress("redis:6379"), + ), + ) + }, + createSecret: false, + }, + { + name: "Redis Password", + redisPassword: "5241590000000000", + rayClusterFn: func(namespace string) *rayv1ac.RayClusterApplyConfiguration { + return rayv1ac.RayCluster("raycluster-gcsft", namespace).WithSpec( + newRayClusterSpec().WithGcsFaultToleranceOptions( + rayv1ac.GcsFaultToleranceOptions(). + WithRedisAddress("redis:6379"). + WithRedisPassword(rayv1ac.RedisCredential().WithValue("5241590000000000")), + ), + ) + }, + createSecret: false, + }, + { + name: "Redis Password In Secret", + redisPassword: "5241590000000000", + rayClusterFn: func(namespace string) *rayv1ac.RayClusterApplyConfiguration { + return rayv1ac.RayCluster("raycluster-gcsft", namespace).WithSpec( + newRayClusterSpec().WithGcsFaultToleranceOptions( + rayv1ac.GcsFaultToleranceOptions(). + WithRedisAddress("redis:6379"). + WithRedisPassword(rayv1ac.RedisCredential(). + WithValueFrom(v1.EnvVarSource{ + SecretKeyRef: &v1.SecretKeySelector{ + LocalObjectReference: v1.LocalObjectReference{Name: "redis-password-secret"}, + Key: "password", + }, + }), + ), + ), + ) + }, + createSecret: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + namespace := test.NewTestNamespace() + + checkRedisDBSize := deployRedis(test, namespace.Name, tc.redisPassword) + defer g.Eventually(checkRedisDBSize, time.Second*30, time.Second).Should(BeEquivalentTo("0")) + + if tc.createSecret { + test.T().Logf("Creating Redis password secret") + _, err := test.Client().Core().CoreV1().Secrets(namespace.Name).Apply( + test.Ctx(), + corev1ac.Secret("redis-password-secret", namespace.Name). + WithStringData(map[string]string{"password": tc.redisPassword}), + TestApplyOptions, + ) + g.Expect(err).NotTo(HaveOccurred()) + } + + rayClusterAC := tc.rayClusterFn(namespace.Name) + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + + test.T().Logf("Verifying environment variables on Head Pod") + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayCluster.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + headPod, err := test.Client().Core().CoreV1().Pods(namespace.Name).Get(test.Ctx(), rayCluster.Status.Head.PodName, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + g.Expect(utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + if tc.redisPassword == "" { + g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeFalse()) + } else { + g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + } + + err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Delete(test.Ctx(), rayCluster.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + }) + } +} + +func TestGcsFaultToleranceAnnotations(t *testing.T) { + tests := []struct { + name string + storageNS string + redisPasswordEnv string + redisPasswordInRayStartParams string + }{ + { + name: "GCS FT without redis password", + storageNS: "", + redisPasswordEnv: "", + redisPasswordInRayStartParams: "", + }, + { + name: "GCS FT with redis password in ray start params", + storageNS: "", + redisPasswordEnv: "", + redisPasswordInRayStartParams: "5241590000000000", + }, + { + name: "GCS FT with redis password in ray start params referring to env", + storageNS: "", + redisPasswordEnv: "5241590000000000", + redisPasswordInRayStartParams: "$REDIS_PASSWORD", + }, + { + name: "GCS FT with storage namespace", + storageNS: "test-storage-ns", + redisPasswordEnv: "5241590000000000", + redisPasswordInRayStartParams: "$REDIS_PASSWORD", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + namespace := test.NewTestNamespace() + + redisPassword := "" + if tc.redisPasswordEnv != "" && tc.redisPasswordInRayStartParams != "" && tc.redisPasswordInRayStartParams != "$REDIS_PASSWORD" { + t.Fatalf("redisPasswordEnv and redisPasswordInRayStartParams are both set") + } + + switch { + case tc.redisPasswordEnv != "": + redisPassword = tc.redisPasswordEnv + case tc.redisPasswordInRayStartParams != "": + redisPassword = tc.redisPasswordInRayStartParams + } + + checkRedisDBSize := deployRedis(test, namespace.Name, redisPassword) + defer g.Eventually(checkRedisDBSize, time.Second*30, time.Second).Should(BeEquivalentTo("0")) + + // Prepare RayCluster ApplyConfiguration + podTemplateAC := headPodTemplateApplyConfiguration() + podTemplateAC.Spec.Containers[utils.RayContainerIndex].WithEnv( + corev1ac.EnvVar().WithName("RAY_REDIS_ADDRESS").WithValue("redis:6379"), + ) + if tc.redisPasswordEnv != "" { + podTemplateAC.Spec.Containers[utils.RayContainerIndex].WithEnv( + corev1ac.EnvVar().WithName("REDIS_PASSWORD").WithValue(tc.redisPasswordEnv), + ) + } + rayClusterAC := rayv1ac.RayCluster("raycluster-gcsft", namespace.Name).WithAnnotations( + map[string]string{utils.RayFTEnabledAnnotationKey: "true"}, + ).WithSpec( + rayClusterSpecWith( + rayv1ac.RayClusterSpec(). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + // RayStartParams are not allowed to be empty. + WithRayStartParams(map[string]string{"dashboard-host": "0.0.0.0"}). + WithTemplate(podTemplateAC)), + ), + ) + if tc.storageNS != "" { + rayClusterAC.WithAnnotations(map[string]string{utils.RayExternalStorageNSAnnotationKey: tc.storageNS}) + } + if tc.redisPasswordInRayStartParams != "" { + rayClusterAC.Spec.HeadGroupSpec.WithRayStartParams(map[string]string{"redis-password": tc.redisPasswordInRayStartParams}) + } + + // Apply RayCluster + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + + test.T().Logf("Verifying environment variables on Head Pod") + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayCluster.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + headPod, err := test.Client().Core().CoreV1().Pods(namespace.Name).Get(test.Ctx(), rayCluster.Status.Head.PodName, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(utils.EnvVarExists(utils.RAY_REDIS_ADDRESS, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + g.Expect(utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + if redisPassword == "" { + g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeFalse()) + } else { + g.Expect(utils.EnvVarExists(utils.REDIS_PASSWORD, headPod.Spec.Containers[utils.RayContainerIndex].Env)).Should(BeTrue()) + } + + err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Delete(test.Ctx(), rayCluster.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + }) + } +} diff --git a/ray-operator/test/e2e/raycluster_test.go b/ray-operator/test/e2e/raycluster_test.go new file mode 100644 index 00000000000..565f6355251 --- /dev/null +++ b/ray-operator/test/e2e/raycluster_test.go @@ -0,0 +1,123 @@ +package e2e + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayClusterManagedBy(t *testing.T) { + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + test.T().Run("Successful creation of cluster, managed by Kuberay Operator", func(t *testing.T) { + t.Parallel() + + rayClusterAC := rayv1ac.RayCluster("raycluster-ok", namespace.Name). + WithSpec(newRayClusterSpec(). + WithManagedBy(utils.KubeRayController)) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + }) + + test.T().Run("Creation of cluster skipped, managed by Kueue", func(t *testing.T) { + t.Parallel() + + rayClusterAC := rayv1ac.RayCluster("raycluster-skip", namespace.Name). + WithSpec(newRayClusterSpec(). + WithManagedBy("kueue.x-k8s.io/multikueue")) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("RayCluster %s/%s will not become ready - not reconciled", rayCluster.Namespace, rayCluster.Name) + g.Consistently(func(gg Gomega) { + rc, err := RayCluster(test, rayCluster.Namespace, rayCluster.Name)() + gg.Expect(err).NotTo(HaveOccurred()) + gg.Expect(rc.Status.Conditions).To(BeEmpty()) + }, time.Second*3, time.Millisecond*500).Should(Succeed()) + + // Should not to be able to change managedBy field as it's immutable + rayClusterAC.Spec.WithManagedBy(utils.KubeRayController) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).To(HaveOccurred()) + g.Eventually(RayCluster(test, *rayClusterAC.Namespace, *rayClusterAC.Name)). + Should(WithTransform(RayClusterManagedBy, Equal(ptr.To("kueue.x-k8s.io/multikueue")))) + }) + + test.T().Run("Failed creation of cluster, managed by external non supported controller", func(t *testing.T) { + t.Parallel() + + rayClusterAC := rayv1ac.RayCluster("raycluster-fail", namespace.Name). + WithSpec(newRayClusterSpec(). + WithManagedBy("controller.com/not-supported")) + + _, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.IsInvalid(err)).To(BeTrue(), "error: %v", err) + }) +} + +func TestRayClusterSuspend(t *testing.T) { + test := With(t) + g := NewWithT(t) + // Create a namespace + namespace := test.NewTestNamespace() + + rayClusterAC := rayv1ac.RayCluster("raycluster-suspend", namespace.Name).WithSpec(newRayClusterSpec()) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + + rayClusterAC = rayClusterAC.WithSpec(rayClusterAC.Spec.WithSuspend(true)) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Suspend RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to be suspended", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) + + rayClusterAC = rayClusterAC.WithSpec(rayClusterAC.Spec.WithSuspend(false)) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Resume RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to be resumed", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) +} diff --git a/ray-operator/test/e2e/rayjob_cluster_selector_test.go b/ray-operator/test/e2e/rayjob_cluster_selector_test.go index 48c84016e47..410d11db6ab 100644 --- a/ray-operator/test/e2e/rayjob_cluster_selector_test.go +++ b/ray-operator/test/e2e/rayjob_cluster_selector_test.go @@ -2,8 +2,10 @@ package e2e import ( "testing" + "time" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" @@ -13,15 +15,15 @@ import ( func TestRayJobWithClusterSelector(t *testing.T) { test := With(t) + g := NewWithT(t) // Create a namespace namespace := test.NewTestNamespace() - test.StreamKubeRayOperatorLogs() // Job scripts - jobsAC := newConfigMap(namespace.Name, "jobs", files(test, "counter.py", "fail.py")) + jobsAC := newConfigMap(namespace.Name, files(test, "counter.py", "fail.py")) jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) // RayCluster @@ -29,11 +31,11 @@ func TestRayJobWithClusterSelector(t *testing.T) { WithSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))) rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) - test.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) test.T().Run("Successful RayJob", func(t *testing.T) { @@ -51,15 +53,15 @@ env_vars: WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the Ray job has completed successfully - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) }) @@ -75,15 +77,59 @@ env_vars: WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the Ray job has failed - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) }) + + test.T().Run("RayJob should be created but not to be updated when managed externally", func(_ *testing.T) { + t.Parallel() + + // RayJob + rayJobAC := rayv1ac.RayJob("managed-externally", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithClusterSelector(map[string]string{utils.RayClusterLabelKey: rayCluster.Name}). + WithEntrypoint("python /home/ray/jobs/counter.py"). + WithRuntimeEnvYAML(` +env_vars: + counter_name: test_counter +`). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration()). + WithManagedBy("kueue.x-k8s.io/multikueue")) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + // Assert the Ray job status has not been updated + g.Consistently(func(gg Gomega) { + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + gg.Expect(err).ToNot(HaveOccurred()) + gg.Expect(rayJob.Status.JobDeploymentStatus).To(Equal(rayv1.JobDeploymentStatusNew)) + }, time.Second*3, time.Millisecond*500).Should(Succeed()) + }) + + test.T().Run("RayJob should not be created due to managedBy invalid value", func(_ *testing.T) { + // RayJob + rayJobAC := rayv1ac.RayJob("managed-externally-invalid", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithClusterSelector(map[string]string{utils.RayClusterLabelKey: rayCluster.Name}). + WithEntrypoint("python /home/ray/jobs/counter.py"). + WithRuntimeEnvYAML(` +env_vars: + counter_name: test_counter +`). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration()). + WithManagedBy("invalid.com/controller")) + + _, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(errors.IsInvalid(err)).To(BeTrue(), "error: %v", err) + }) } diff --git a/ray-operator/test/e2e/rayjob_lightweight_test.go b/ray-operator/test/e2e/rayjob_lightweight_test.go index c40798c1f08..3516dff4e06 100644 --- a/ray-operator/test/e2e/rayjob_lightweight_test.go +++ b/ray-operator/test/e2e/rayjob_lightweight_test.go @@ -5,10 +5,11 @@ import ( . "github.com/onsi/gomega" - k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" . "github.com/ray-project/kuberay/ray-operator/test/support" @@ -16,18 +17,18 @@ import ( func TestRayJobLightWeightMode(t *testing.T) { test := With(t) + g := NewWithT(t) // Create a namespace namespace := test.NewTestNamespace() - test.StreamKubeRayOperatorLogs() // Job scripts - jobsAC := newConfigMap(namespace.Name, "jobs", files(test, "counter.py", "fail.py", "stop.py")) + jobsAC := newConfigMap(namespace.Name, files(test, "counter.py", "fail.py", "stop.py")) jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) - test.T().Run("Successful RayJob", func(t *testing.T) { + test.T().Run("Successful RayJob", func(_ *testing.T) { rayJobAC := rayv1ac.RayJob("counter", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). WithSubmissionMode(rayv1.HTTPMode). @@ -53,31 +54,32 @@ env_vars: mountConfigMap[corev1ac.PodTemplateSpecApplyConfiguration](jobs, "/home/ray/jobs")))))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the RayJob has completed successfully - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) // And the RayJob deployment status is updated accordingly - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) // TODO (kevin85421): We may need to use `Eventually` instead if the assertion is flaky. // Assert the RayCluster has been torn down - _, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayJob.Status.RayClusterName, metav1.GetOptions{}) - test.Expect(err).To(MatchError(k8serrors.NewNotFound(rayv1.Resource("rayclusters"), rayJob.Status.RayClusterName))) + _, err = GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) }) - test.T().Run("Failing RayJob without cluster shutdown after finished", func(t *testing.T) { + test.T().Run("Failing RayJob without cluster shutdown after finished", func(_ *testing.T) { rayJobAC := rayv1ac.RayJob("fail", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). WithSubmissionMode(rayv1.HTTPMode). @@ -86,28 +88,28 @@ env_vars: WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the Ray job has failed - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) // Assert that the RayJob deployment status and RayJob reason have been updated accordingly. - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobReason, Equal(rayv1.AppFailed))) // In the lightweight submission mode, the submitter Kubernetes Job should not be created. - test.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) + g.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) }) - test.T().Run("Should transition to 'Complete' if the Ray job has stopped.", func(t *testing.T) { + test.T().Run("Should transition to 'Complete' if the Ray job has stopped.", func(_ *testing.T) { // `stop.py` will sleep for 20 seconds so that the RayJob has enough time to transition to `RUNNING` // and then stop the Ray job. If the Ray job is stopped, the RayJob should transition to `Complete`. rayJobAC := rayv1ac.RayJob("stop", namespace.Name). @@ -117,24 +119,23 @@ env_vars: WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to be 'Running'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusRunning))) test.T().Logf("Waiting for RayJob %s/%s to be 'Complete'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) - test.Expect(rayJob.Status.JobStatus).To(Equal(rayv1.JobStatusStopped)) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)).To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusStopped))) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) }) } diff --git a/ray-operator/test/e2e/rayjob_recovery_test.go b/ray-operator/test/e2e/rayjob_recovery_test.go new file mode 100644 index 00000000000..f25e84c3ac4 --- /dev/null +++ b/ray-operator/test/e2e/rayjob_recovery_test.go @@ -0,0 +1,103 @@ +package e2e + +import ( + "fmt" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayJobRecovery(t *testing.T) { + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Job scripts + jobsAC := newConfigMap(namespace.Name, files(test, "long_running_counter.py")) + jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) + + test.T().Run("RayJob should recover after pod deletion", func(_ *testing.T) { + rayJobAC := rayv1ac.RayJob("counter", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). + WithEntrypoint("python /home/ray/jobs/long_running_counter.py"). + WithRuntimeEnvYAML(` +env_vars: + counter_name: test_counter +`). + WithShutdownAfterJobFinishes(true). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + test.T().Logf("Waiting for RayJob %s/%s to start running", rayJob.Namespace, rayJob.Name) + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + Should(WithTransform(RayJobStatus, Equal(rayv1.JobStatusRunning))) + test.T().Logf("Find RayJob %s/%s running", rayJob.Namespace, rayJob.Name) + // wait for the job to run a bit + test.T().Logf("Sleep RayJob %s/%s 15 seconds", rayJob.Namespace, rayJob.Name) + time.Sleep(15 * time.Second) + + // get the running jobpods + jobpods, err := test.Client().Core().CoreV1().Pods(namespace.Name).List(test.Ctx(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("job-name=%s", rayJob.Name), + }) + g.Expect(err).NotTo(HaveOccurred()) + + // remove the running jobpods + propagationPolicy := metav1.DeletePropagationBackground + for _, pod := range jobpods.Items { + test.T().Logf("Delete Pod %s from namespace %s", pod.Name, rayJob.Namespace) + err = test.Client().Core().CoreV1().Pods(namespace.Name).Delete(test.Ctx(), pod.Name, metav1.DeleteOptions{ + PropagationPolicy: &propagationPolicy, + }) + g.Expect(err).NotTo(HaveOccurred()) + } + + test.T().Logf("Waiting for new pod to be created and running for RayJob %s/%s", namespace.Name, rayJob.Name) + g.Eventually(func() ([]corev1.Pod, error) { + pods, err := test.Client().Core().CoreV1().Pods(namespace.Name).List( + test.Ctx(), + metav1.ListOptions{ + LabelSelector: fmt.Sprintf("job-name=%s", rayJob.Name), + }, + ) + g.Expect(err).NotTo(HaveOccurred()) + return pods.Items, nil + }, TestTimeoutMedium).Should( + WithTransform(func(pods []corev1.Pod) bool { + for _, pod := range pods { + if pod.Status.Phase == corev1.PodRunning { + for _, oldPod := range jobpods.Items { + if pod.Name == oldPod.Name { + continue + } + } + test.T().Logf("Found new running pod %s/%s", pod.Namespace, pod.Name) + return true + } + } + return false + }, BeTrue()), + ) + + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + Should(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) + + g.Eventually(RayJob(test, namespace.Name, rayJob.Name), TestTimeoutMedium). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) + }) +} diff --git a/ray-operator/test/e2e/rayjob_retry_test.go b/ray-operator/test/e2e/rayjob_retry_test.go new file mode 100644 index 00000000000..fa3a24be7d6 --- /dev/null +++ b/ray-operator/test/e2e/rayjob_retry_test.go @@ -0,0 +1,210 @@ +package e2e + +import ( + "testing" + + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + k8serrors "k8s.io/apimachinery/pkg/api/errors" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayJobRetry(t *testing.T) { + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Job scripts + jobsAC := newConfigMap(namespace.Name, files(test, "fail.py")) + jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) + + test.T().Run("Failing RayJob without cluster shutdown after finished", func(_ *testing.T) { + // RayJob: Set RayJob.BackoffLimit to 2 + rayJobAC := rayv1ac.RayJob("fail", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithBackoffLimit(2). + WithSubmitterConfig(rayv1ac.SubmitterConfig(). + WithBackoffLimit(0)). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). + WithEntrypoint("python /home/ray/jobs/fail.py"). + WithShutdownAfterJobFinishes(false). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) + + // Assert that the RayJob deployment status and RayJob reason have been updated accordingly. + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutLong). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobReason, Equal(rayv1.AppFailed))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) + + // Check whether the controller respects the backoffLimit. + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobFailed, Equal(int32(3)))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobSucceeded, Equal(int32(0)))) + + // Refresh the RayJob status + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) + + // Delete the RayJob + err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + // Assert the RayCluster has been cascade deleted + g.Eventually(func() error { + _, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + return err + }).Should(WithTransform(k8serrors.IsNotFound, BeTrue())) + + // Assert the submitter Job has been cascade deleted + g.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) + }) + + test.T().Run("Failing submitter K8s Job", func(_ *testing.T) { + // RayJob: Set RayJob.BackoffLimit to 2 & SubmitterConfig.BackoffLimit to 0 to test RayJob level backoffLimit + rayJobAC := rayv1ac.RayJob("fail-submitter-k8s-job", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithBackoffLimit(2). + WithSubmitterConfig(rayv1ac.SubmitterConfig(). + WithBackoffLimit(0)). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). + WithEntrypoint("The command will be overridden by the submitter Job"). + WithShutdownAfterJobFinishes(true). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) + + // In this test, we try to simulate the case where the submitter Job can't connect to the RayCluster successfully. + // Hence, KubeRay can't get the Ray job information from the RayCluster. When the RayJob reaches the backoff + // limit, it will be marked as failed. Then, the RayJob should transition to `Failed`. + rayJobAC.Spec.SubmitterPodTemplate.Spec.Containers[0].WithCommand("ray", "job", "submit", "--address", "http://do-not-exist:8265", "--", "echo 123") + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) + + // Ensure JobDeploymentStatus transit to Failed + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) + // Ensure JobStatus is empty + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusNew))) + // Ensure Reason is SubmissionFailed + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobReason, Equal(rayv1.SubmissionFailed))) + + // Check whether the controller respects the backoffLimit. + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobFailed, Equal(int32(3)))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobSucceeded, Equal(int32(0)))) + + // Refresh the RayJob status + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) + + // Assert the RayCluster has been deleted because ShutdownAfterJobFinishes is true. + g.Eventually(func() error { + _, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + return err + }, TestTimeoutMedium).Should(WithTransform(k8serrors.IsNotFound, BeTrue())) + // Asset submitter Job is not deleted yet + g.Eventually(Jobs(test, namespace.Name)).ShouldNot(BeEmpty()) + + // Delete the RayJob + err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + }) + + test.T().Run("RayJob has passed ActiveDeadlineSeconds", func(_ *testing.T) { + // RayJob will transition to JobDeploymentStatusFailed + // regardless of the value of backoffLimit. + rayJobAC := rayv1ac.RayJob("long-running", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithBackoffLimit(2). + WithSubmitterConfig(rayv1ac.SubmitterConfig(). + WithBackoffLimit(0)). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). + WithEntrypoint("python /home/ray/jobs/long_running.py"). + WithShutdownAfterJobFinishes(true). + WithTTLSecondsAfterFinished(600). + WithActiveDeadlineSeconds(5). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + // The RayJob will transition to `Failed` because it has passed `ActiveDeadlineSeconds`. + test.T().Logf("Waiting for RayJob %s/%s to be 'Failed'", rayJob.Namespace, rayJob.Name) + + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutShort). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobReason, Equal(rayv1.DeadlineExceeded))) + + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobFailed, Equal(int32(1)))) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobSucceeded, Equal(int32(0)))) + }) + + test.T().Run("Failing RayJob with HttpMode submission mode", func(_ *testing.T) { + // Set up the RayJob with HTTP mode and a BackoffLimit + rayJobAC := rayv1ac.RayJob("failing-rayjob-in-httpmode", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithSubmissionMode(rayv1.HTTPMode). + WithBackoffLimit(2). + WithEntrypoint("python /home/ray/jobs/fail.py"). + WithShutdownAfterJobFinishes(false). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) + + // Assert that the RayJob deployment status has been updated. + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) + + // Assert the Ray job has failed. + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) + + // Check the RayJob reason has been updated. + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + To(WithTransform(RayJobReason, Equal(rayv1.AppFailed))) + + // Check whether the controller respects the backoffLimit. + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobFailed, Equal(int32(3)))) // 2 retries + 1 initial attempt = 3 failures + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobSucceeded, Equal(int32(0)))) + + // Clean up + err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + }) +} diff --git a/ray-operator/test/e2e/rayjob_suspend_test.go b/ray-operator/test/e2e/rayjob_suspend_test.go index b116cbd1d7e..2292be42d68 100644 --- a/ray-operator/test/e2e/rayjob_suspend_test.go +++ b/ray-operator/test/e2e/rayjob_suspend_test.go @@ -5,9 +5,10 @@ import ( . "github.com/onsi/gomega" - k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" @@ -16,18 +17,18 @@ import ( func TestRayJobSuspend(t *testing.T) { test := With(t) + g := NewWithT(t) // Create a namespace namespace := test.NewTestNamespace() - test.StreamKubeRayOperatorLogs() // Job scripts - jobsAC := newConfigMap(namespace.Name, "jobs", files(test, "long_running.py", "counter.py")) + jobsAC := newConfigMap(namespace.Name, files(test, "long_running.py", "counter.py")) jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) - test.T().Run("Suspend the RayJob when its status is 'Running', and then resume it.", func(t *testing.T) { + test.T().Run("Suspend the RayJob when its status is 'Running', and then resume it.", func(_ *testing.T) { // RayJob rayJobAC := rayv1ac.RayJob("long-running", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). @@ -38,29 +39,29 @@ func TestRayJobSuspend(t *testing.T) { WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to be 'Running'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusRunning))) test.T().Logf("Suspend the RayJob %s/%s", rayJob.Namespace, rayJob.Name) rayJobAC.Spec.WithSuspend(true) rayJob, err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Waiting for RayJob %s/%s to be 'Suspended'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusSuspended))) // TODO (kevin85421): We may need to use `Eventually` instead if the assertion is flaky. // Assert the RayCluster has been torn down - _, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayJob.Status.RayClusterName, metav1.GetOptions{}) - test.Expect(err).To(MatchError(k8serrors.NewNotFound(rayv1.Resource("rayclusters"), rayJob.Status.RayClusterName))) + _, err = GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) // Assert the submitter Job has been cascade deleted - test.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) + g.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) // TODO (kevin85421): Check whether the Pods associated with the RayCluster and the submitter Job have been deleted. // For Kubernetes Jobs, the default deletion behavior is "orphanDependents," which means the Pods will not be @@ -69,17 +70,17 @@ func TestRayJobSuspend(t *testing.T) { test.T().Logf("Resume the RayJob by updating `suspend` to false.") rayJobAC.Spec.WithSuspend(false) rayJob, err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Expect(err).NotTo(HaveOccurred()) + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusRunning))) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) }) - test.T().Run("Create a suspended RayJob, and then resume it.", func(t *testing.T) { + test.T().Run("Create a suspended RayJob, and then resume it.", func(_ *testing.T) { // RayJob rayJobAC := rayv1ac.RayJob("counter", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). @@ -94,40 +95,43 @@ env_vars: WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to be 'Suspended'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusSuspended))) test.T().Logf("Resume the RayJob by updating `suspend` to false.") rayJobAC.Spec.WithSuspend(false) rayJob, err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Assert the RayJob has completed successfully - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) // Assert the RayCluster has been cascade deleted - test.Eventually(NotFound(RayClusterOrError(test, rayJob.Namespace, rayJob.Status.RayClusterName))). - Should(BeTrue()) + g.Eventually(func() error { + _, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + return err + }).Should(WithTransform(k8serrors.IsNotFound, BeTrue())) // Assert the Pods has been cascade deleted - test.Eventually(Pods(test, namespace.Name, + g.Eventually(Pods(test, namespace.Name, LabelSelector(utils.RayClusterLabelKey+"="+rayJob.Status.RayClusterName))). Should(BeEmpty()) }) diff --git a/ray-operator/test/e2e/rayjob_test.go b/ray-operator/test/e2e/rayjob_test.go index 960d49b9f4d..866eaaafb04 100644 --- a/ray-operator/test/e2e/rayjob_test.go +++ b/ray-operator/test/e2e/rayjob_test.go @@ -5,28 +5,31 @@ import ( "time" . "github.com/onsi/gomega" - k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + k8serrors "k8s.io/apimachinery/pkg/api/errors" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" . "github.com/ray-project/kuberay/ray-operator/test/support" ) func TestRayJob(t *testing.T) { test := With(t) + g := NewWithT(t) // Create a namespace namespace := test.NewTestNamespace() - test.StreamKubeRayOperatorLogs() // Job scripts - jobsAC := newConfigMap(namespace.Name, "jobs", files(test, "counter.py", "fail.py", "stop.py", "long_running.py")) + jobsAC := newConfigMap(namespace.Name, files(test, "counter.py", "fail.py", "stop.py", "long_running.py")) jobs, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), jobsAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created ConfigMap %s/%s successfully", jobs.Namespace, jobs.Name) - test.T().Run("Successful RayJob", func(t *testing.T) { + test.T().Run("Successful RayJob", func(_ *testing.T) { // RayJob rayJobAC := rayv1ac.RayJob("counter", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). @@ -40,31 +43,32 @@ env_vars: WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the RayJob has completed successfully - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) // And the RayJob deployment status is updated accordingly - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) // TODO (kevin85421): We may need to use `Eventually` instead if the assertion is flaky. // Assert the RayCluster has been torn down - _, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayJob.Status.RayClusterName, metav1.GetOptions{}) - test.Expect(err).To(MatchError(k8serrors.NewNotFound(rayv1.Resource("rayclusters"), rayJob.Status.RayClusterName))) + _, err = GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) // Assert the submitter Job has not been deleted - test.Eventually(Jobs(test, namespace.Name)).ShouldNot(BeEmpty()) + g.Eventually(Jobs(test, namespace.Name)).ShouldNot(BeEmpty()) // TODO (kevin85421): Check whether the Pods associated with the RayCluster and the submitter Job have been deleted. // For Kubernetes Jobs, the default deletion behavior is "orphanDependents," which means the Pods will not be @@ -73,17 +77,17 @@ env_vars: test.T().Logf("Update `suspend` to true. However, since the RayJob is completed, the status should not be updated to `Suspended`.") rayJobAC.Spec.WithSuspend(true) rayJob, err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) - test.Consistently(RayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(err).NotTo(HaveOccurred()) + g.Consistently(RayJob(test, rayJob.Namespace, rayJob.Name)). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) }) - test.T().Run("Failing RayJob without cluster shutdown after finished", func(t *testing.T) { + test.T().Run("Failing RayJob without cluster shutdown after finished", func(_ *testing.T) { // RayJob rayJobAC := rayv1ac.RayJob("fail", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). @@ -93,42 +97,45 @@ env_vars: WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobStatus, Satisfy(rayv1.IsJobTerminal))) // Assert the Ray job has failed - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusFailed))) // Assert that the RayJob deployment status and RayJob reason have been updated accordingly. - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobReason, Equal(rayv1.AppFailed))) // TODO (kevin85421): Ensure the RayCluster and Kubernetes Job are not deleted because `ShutdownAfterJobFinishes` is false. // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) // Assert the RayCluster has been cascade deleted - test.Eventually(NotFound(RayClusterOrError(test, namespace.Name, rayJob.Status.RayClusterName))). - Should(BeTrue()) + g.Eventually(func() error { + _, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + return err + }).Should(WithTransform(k8serrors.IsNotFound, BeTrue())) // Assert the submitter Job has been cascade deleted - test.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) + g.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) }) - test.T().Run("Failing submitter K8s Job", func(t *testing.T) { + test.T().Run("Failing submitter K8s Job", func(_ *testing.T) { // RayJob rayJobAC := rayv1ac.RayJob("fail-k8s-job", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). @@ -143,33 +150,36 @@ env_vars: rayJobAC.Spec.SubmitterPodTemplate.Spec.Containers[0].WithCommand("ray", "job", "submit", "--address", "http://do-not-exist:8265", "--", "echo 123") rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusNew))) - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobReason, Equal(rayv1.SubmissionFailed))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) // Assert the RayCluster has been deleted because ShutdownAfterJobFinishes is true. - test.Eventually(NotFound(RayClusterOrError(test, namespace.Name, rayJob.Status.RayClusterName)), TestTimeoutMedium). - Should(BeTrue()) + g.Eventually(func() error { + _, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + return err + }).Should(WithTransform(k8serrors.IsNotFound, BeTrue())) // Asset submitter Job is not deleted yet - test.Eventually(Jobs(test, namespace.Name)).ShouldNot(BeEmpty()) + g.Eventually(Jobs(test, namespace.Name)).ShouldNot(BeEmpty()) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) }) - test.T().Run("Should transition to 'Complete' if the Ray job has stopped.", func(t *testing.T) { + test.T().Run("Should transition to 'Complete' if the Ray job has stopped.", func(_ *testing.T) { // `stop.py` will sleep for 20 seconds so that the RayJob has enough time to transition to `RUNNING` // and then stop the Ray job. If the Ray job is stopped, the RayJob should transition to `Complete`. rayJobAC := rayv1ac.RayJob("stop", namespace.Name). @@ -179,28 +189,27 @@ env_vars: WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) test.T().Logf("Waiting for RayJob %s/%s to be 'Running'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusRunning))) test.T().Logf("Waiting for RayJob %s/%s to be 'Complete'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutMedium). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) // Refresh the RayJob status - rayJob = GetRayJob(test, rayJob.Namespace, rayJob.Name) - test.Expect(rayJob.Status.JobStatus).To(Equal(rayv1.JobStatusStopped)) + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)).To(WithTransform(RayJobStatus, Equal(rayv1.JobStatusStopped))) // Delete the RayJob err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), rayJob.Name, metav1.DeleteOptions{}) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Deleted RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) }) - test.T().Run("RuntimeEnvYAML is not a valid YAML string", func(t *testing.T) { + test.T().Run("RuntimeEnvYAML is not a valid YAML string", func(_ *testing.T) { rayJobAC := rayv1ac.RayJob("invalid-yamlstr", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). WithEntrypoint("python /home/ray/jobs/counter.py"). @@ -208,15 +217,15 @@ env_vars: WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs")))) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) // `RuntimeEnvYAML` is not a valid YAML string, so the RayJob controller will not do anything with the CR. - test.Consistently(RayJob(test, rayJob.Namespace, rayJob.Name), 5*time.Second). + g.Consistently(RayJob(test, rayJob.Namespace, rayJob.Name), 5*time.Second). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusNew))) }) - test.T().Run("RayJob has passed ActiveDeadlineSeconds", func(t *testing.T) { + test.T().Run("RayJob has passed ActiveDeadlineSeconds", func(_ *testing.T) { rayJobAC := rayv1ac.RayJob("long-running", namespace.Name). WithSpec(rayv1ac.RayJobSpec(). WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). @@ -227,14 +236,59 @@ env_vars: WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration())) rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) - test.Expect(err).NotTo(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) // The RayJob will transition to `Complete` because it has passed `ActiveDeadlineSeconds`. test.T().Logf("Waiting for RayJob %s/%s to be 'Complete'", rayJob.Namespace, rayJob.Name) - test.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutShort). + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutShort). Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusFailed))) - test.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). + g.Expect(GetRayJob(test, rayJob.Namespace, rayJob.Name)). To(WithTransform(RayJobReason, Equal(rayv1.DeadlineExceeded))) }) + + test.T().Run("RayJob should be created, but not updated when managed externally", func(_ *testing.T) { + // RayJob + rayJobAC := rayv1ac.RayJob("managed-externally", namespace.Name). + WithSpec(rayv1ac.RayJobSpec(). + WithRayClusterSpec(newRayClusterSpec(mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](jobs, "/home/ray/jobs"))). + WithEntrypoint("python /home/ray/jobs/counter.py"). + WithRuntimeEnvYAML(` +env_vars: + counter_name: test_counter +`). + WithShutdownAfterJobFinishes(true). + WithSubmitterPodTemplate(jobSubmitterPodTemplateApplyConfiguration()). + WithManagedBy("kueue.x-k8s.io/multikueue")) + + rayJob, err := test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) + + // Should not to be able to change managedBy field as it's immutable + rayJobAC.Spec.WithManagedBy(utils.KubeRayController) + _, err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Apply(test.Ctx(), rayJobAC, TestApplyOptions) + g.Expect(err).To(HaveOccurred()) + g.Eventually(RayJob(test, *rayJobAC.Namespace, *rayJobAC.Name)). + Should(WithTransform(RayJobManagedBy, Equal(ptr.To("kueue.x-k8s.io/multikueue")))) + + // Refresh the RayJob status and assert it has not been updated + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name)). + Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusNew))) + + // Assert the associated RayCluster has not beed created + rcList, err := test.Client().Ray().RayV1().RayClusters(rayJob.Namespace).List(test.Ctx(), metav1.ListOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + for _, rc := range rcList.Items { + g.Expect(rc.Name).NotTo(HaveSuffix(*rayJobAC.Name)) + } + + // Assert the submitter Job has not been created + g.Eventually(Jobs(test, namespace.Name)).Should(BeEmpty()) + + // Delete the RayJob + err = test.Client().Ray().RayV1().RayJobs(namespace.Name).Delete(test.Ctx(), *rayJobAC.Name, metav1.DeleteOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Deleted RayJob %s/%s successfully", *rayJobAC.Namespace, *rayJobAC.Name) + }) } diff --git a/ray-operator/test/e2e/support.go b/ray-operator/test/e2e/support.go index 0c5ec8bf9e9..2dec0d953df 100644 --- a/ray-operator/test/e2e/support.go +++ b/ray-operator/test/e2e/support.go @@ -2,9 +2,9 @@ package e2e import ( "embed" + "strings" - "github.com/onsi/gomega" - + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" @@ -19,7 +19,7 @@ var _files embed.FS func ReadFile(t Test, fileName string) []byte { t.T().Helper() file, err := _files.ReadFile(fileName) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) return file } @@ -41,8 +41,8 @@ func options[T any](options ...option[T]) option[T] { } } -func newConfigMap(namespace, name string, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { - cmAC := corev1ac.ConfigMap(name, namespace). +func newConfigMap(namespace string, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { + cmAC := corev1ac.ConfigMap("jobs", namespace). WithBinaryData(map[string][]byte{}). WithImmutable(true) @@ -126,13 +126,10 @@ func headPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyConfigura WithImage(GetRayImage()). WithPorts( corev1ac.ContainerPort().WithName("gcs").WithContainerPort(6379), + corev1ac.ContainerPort().WithName("serve").WithContainerPort(8000), corev1ac.ContainerPort().WithName("dashboard").WithContainerPort(8265), corev1ac.ContainerPort().WithName("client").WithContainerPort(10001), ). - WithLifecycle(corev1ac.Lifecycle(). - WithPreStop(corev1ac.LifecycleHandler(). - WithExec(corev1ac.ExecAction(). - WithCommand("/bin/sh", "-c", "ray stop")))). WithResources(corev1ac.ResourceRequirements(). WithRequests(corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("300m"), @@ -150,10 +147,6 @@ func workerPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyConfigu WithContainers(corev1ac.Container(). WithName("ray-worker"). WithImage(GetRayImage()). - WithLifecycle(corev1ac.Lifecycle(). - WithPreStop(corev1ac.LifecycleHandler(). - WithExec(corev1ac.ExecAction(). - WithCommand("/bin/sh", "-c", "ray stop")))). WithResources(corev1ac.ResourceRequirements(). WithRequests(corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("300m"), @@ -182,3 +175,40 @@ func jobSubmitterPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyC corev1.ResourceMemory: resource.MustParse("500Mi"), })))) } + +func deployRedis(t Test, namespace string, password string) func() string { + redisContainer := corev1ac.Container().WithName("redis").WithImage("redis:7.4"). + WithPorts(corev1ac.ContainerPort().WithContainerPort(6379)) + dbSizeCmd := []string{"redis-cli", "--no-auth-warning", "DBSIZE"} + if password != "" { + redisContainer.WithCommand("redis-server", "--requirepass", password) + dbSizeCmd = []string{"redis-cli", "--no-auth-warning", "-a", password, "DBSIZE"} + } + + pod, err := t.Client().Core().CoreV1().Pods(namespace).Apply( + t.Ctx(), + corev1ac.Pod("redis", namespace). + WithLabels(map[string]string{"app": "redis"}). + WithSpec(corev1ac.PodSpec().WithContainers(redisContainer)), + TestApplyOptions, + ) + assert.NoError(t.T(), err) + + _, err = t.Client().Core().CoreV1().Services(namespace).Apply( + t.Ctx(), + corev1ac.Service("redis", namespace). + WithSpec(corev1ac.ServiceSpec(). + WithSelector(map[string]string{"app": "redis"}). + WithPorts(corev1ac.ServicePort(). + WithPort(6379), + ), + ), + TestApplyOptions, + ) + assert.NoError(t.T(), err) + + return func() string { + stdout, stderr := ExecPodCmd(t, pod, "redis", dbSizeCmd) + return strings.TrimSpace(stdout.String() + stderr.String()) + } +} diff --git a/ray-operator/test/e2eautoscaler/create_concurrent_tasks.py b/ray-operator/test/e2eautoscaler/create_concurrent_tasks.py new file mode 100644 index 00000000000..98b861c140e --- /dev/null +++ b/ray-operator/test/e2eautoscaler/create_concurrent_tasks.py @@ -0,0 +1,19 @@ +"""This script create a number of tasks at roughly the same time, and wait for their completion.""" + +import ray +import time +import random + +# The task number should be large enough, so the autoscalar is triggered to scale to max replica. +_TASK_NUM = 30 +# The min task duration should be long enough, which passes the autoscaling stage of the test. +_TASK_MIN_DUR_SEC = 5 +# The max task duration should be reasonable to have a cap on overal test duration. +_TASK_MAX_DUR_SEC = 10 + +@ray.remote(num_cpus=1) +def f(): + sleep_time_sec = random.randint(_TASK_MIN_DUR_SEC, _TASK_MAX_DUR_SEC) + time.sleep(sleep_time_sec) + +ray.get([f.remote() for _ in range(_TASK_NUM)]) diff --git a/ray-operator/test/e2eautoscaler/create_detached_actor.py b/ray-operator/test/e2eautoscaler/create_detached_actor.py new file mode 100644 index 00000000000..9a5ab968798 --- /dev/null +++ b/ray-operator/test/e2eautoscaler/create_detached_actor.py @@ -0,0 +1,18 @@ +import ray +import sys +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('name') +parser.add_argument('--num-cpus', type=float, default=1) +parser.add_argument('--num-gpus', type=float, default=0) +parser.add_argument('--num-custom-resources', type=float, default=0) +args = parser.parse_args() + +@ray.remote(num_cpus=args.num_cpus, num_gpus=args.num_gpus, resources={"CustomResource": args.num_custom_resources}) +class Actor: + pass + + +ray.init(namespace="default_namespace") +Actor.options(name=args.name, lifetime="detached").remote() diff --git a/ray-operator/test/e2eautoscaler/raycluster_autoscaler_test.go b/ray-operator/test/e2eautoscaler/raycluster_autoscaler_test.go new file mode 100644 index 00000000000..403a380d5b5 --- /dev/null +++ b/ray-operator/test/e2eautoscaler/raycluster_autoscaler_test.go @@ -0,0 +1,357 @@ +package e2eautoscaler + +import ( + "fmt" + "testing" + + "github.com/onsi/gomega" + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + "k8s.io/utils/ptr" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +var tests = map[string]struct { + HeadPodTemplateGetter func() *corev1ac.PodTemplateSpecApplyConfiguration + WorkerPodTemplateGetter func() *corev1ac.PodTemplateSpecApplyConfiguration +}{ + "Create a RayCluster with autoscaling enabled": { + HeadPodTemplateGetter: headPodTemplateApplyConfiguration, + WorkerPodTemplateGetter: workerPodTemplateApplyConfiguration, + }, + "Create a RayCluster with autoscaler v2 enabled": { + HeadPodTemplateGetter: headPodTemplateApplyConfigurationV2, + WorkerPodTemplateGetter: workerPodTemplateApplyConfigurationV2, + }, +} + +func TestRayClusterAutoscaler(t *testing.T) { + for name, tc := range tests { + test := With(t) + g := gomega.NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Scripts for creating and terminating detached actors to trigger autoscaling + scriptsAC := newConfigMap(namespace.Name, files(test, "create_detached_actor.py", "terminate_detached_actor.py")) + scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name) + + test.T().Run(name, func(_ *testing.T) { + rayClusterSpecAC := rayv1ac.RayClusterSpec(). + WithEnableInTreeAutoscaling(true). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"num-cpus": "0"}). + WithTemplate(tc.HeadPodTemplateGetter())). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithReplicas(0). + WithMinReplicas(0). + WithMaxReplicas(3). + WithGroupName("small-group"). + WithRayStartParams(map[string]string{"num-cpus": "1"}). + WithTemplate(tc.WorkerPodTemplateGetter())) + rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name). + WithSpec(apply(rayClusterSpecAC, mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](scripts, "/home/ray/test_scripts"))) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + // Wait for RayCluster to become ready and verify the number of available worker replicas. + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + + headPod, err := GetHeadPod(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Create a detached actor, and a worker should be created. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", "actor1"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1)))) + + // Create a detached actor, and a worker should be created. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", "actor2"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(2)))) + + // Terminate a detached actor, and a worker should be deleted. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/terminate_detached_actor.py", "actor1"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1)))) + + // Terminate a detached actor, and a worker should be deleted. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/terminate_detached_actor.py", "actor2"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + }) + } +} + +func TestRayClusterAutoscalerWithFakeGPU(t *testing.T) { + for name, tc := range tests { + + test := With(t) + g := gomega.NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Scripts for creating and terminating detached actors to trigger autoscaling + scriptsAC := newConfigMap(namespace.Name, files(test, "create_detached_actor.py", "terminate_detached_actor.py")) + scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name) + + test.T().Run(name, func(_ *testing.T) { + rayClusterSpecAC := rayv1ac.RayClusterSpec(). + WithEnableInTreeAutoscaling(true). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"num-cpus": "0"}). + WithTemplate(tc.HeadPodTemplateGetter())). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithReplicas(0). + WithMinReplicas(0). + WithMaxReplicas(3). + WithGroupName("gpu-group"). + WithRayStartParams(map[string]string{"num-cpus": "1", "num-gpus": "1"}). + WithTemplate(tc.WorkerPodTemplateGetter())) + rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name). + WithSpec(apply(rayClusterSpecAC, mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](scripts, "/home/ray/test_scripts"))) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + // Wait for RayCluster to become ready and verify the number of available worker replicas. + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + + headPod, err := GetHeadPod(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Create a detached gpu actor, and a worker in the "gpu-group" should be created. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", "gpu_actor", "--num-gpus=1"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1)))) + // We don't use real GPU resources of Kubernetes here, therefore we can't test the RayClusterDesiredGPU. + // We test the Pods count of the "gpu-group" instead. + g.Expect(GetGroupPods(test, rayCluster, "gpu-group")).To(gomega.HaveLen(1)) + + // Terminate the gpu detached actor, and the worker should be deleted. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/terminate_detached_actor.py", "gpu_actor"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + }) + } +} + +func TestRayClusterAutoscalerWithCustomResource(t *testing.T) { + for name, tc := range tests { + + test := With(t) + g := gomega.NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Scripts for creating and terminating detached actors to trigger autoscaling + scriptsAC := newConfigMap(namespace.Name, files(test, "create_detached_actor.py", "terminate_detached_actor.py")) + scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name) + + test.T().Run(name, func(_ *testing.T) { + groupName := "custom-resource-group" + + rayClusterSpecAC := rayv1ac.RayClusterSpec(). + WithEnableInTreeAutoscaling(true). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"num-cpus": "0"}). + WithTemplate(tc.HeadPodTemplateGetter())). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithReplicas(0). + WithMinReplicas(0). + WithMaxReplicas(3). + WithGroupName(groupName). + WithRayStartParams(map[string]string{"num-cpus": "1", "resources": `'{"CustomResource": 1}'`}). + WithTemplate(tc.WorkerPodTemplateGetter())) + rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name). + WithSpec(apply(rayClusterSpecAC, mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](scripts, "/home/ray/test_scripts"))) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + // Wait for RayCluster to become ready and verify the number of available worker replicas. + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + + headPod, err := GetHeadPod(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Create a detached custom resource actor, and a worker in the "custom-resource-group" should be created. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", "custom_resource_actor", "--num-custom-resources=1"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1)))) + g.Expect(GetGroupPods(test, rayCluster, groupName)).To(gomega.HaveLen(1)) + + // Terminate the custom resource detached actor, and the worker should be deleted. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/terminate_detached_actor.py", "custom_resource_actor"}) + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + }) + } +} + +func TestRayClusterAutoscalerWithDesiredState(t *testing.T) { + for name, tc := range tests { + + test := With(t) + g := gomega.NewWithT(t) + + const maxReplica = 3 + // Set the scale down window to a large enough value, so scale down could be disabled to avoid test flakiness. + const scaleDownWaitSec = 3600 + + // Create a namespace + namespace := test.NewTestNamespace() + + // Scripts for creating and terminating detached actors to trigger autoscaling + scriptsAC := newConfigMap(namespace.Name, files(test, "create_concurrent_tasks.py")) + scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name) + + test.T().Run(name, func(_ *testing.T) { + groupName := "custom-resource-group" + rayClusterSpecAC := rayv1ac.RayClusterSpec(). + WithEnableInTreeAutoscaling(true). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"num-cpus": "0"}). + WithTemplate(tc.HeadPodTemplateGetter())). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithReplicas(0). + WithMinReplicas(0). + WithMaxReplicas(maxReplica). + WithGroupName(groupName). + WithRayStartParams(map[string]string{"num-cpus": "1", "resources": `'{"CustomResource": 1}'`}). + WithTemplate(tc.WorkerPodTemplateGetter())). + WithAutoscalerOptions(rayv1ac.AutoscalerOptions(). + WithIdleTimeoutSeconds(scaleDownWaitSec)) + rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name). + WithSpec(apply(rayClusterSpecAC, mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](scripts, "/home/ray/test_scripts"))) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + // Wait for RayCluster to become ready and verify the number of available worker replicas. + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0)))) + + headPod, err := GetHeadPod(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Create a number of tasks and wait for their completion, and a worker in the "custom-resource-group" should be created. + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_concurrent_tasks.py"}) + + // Scale down has been disabled, after ray script execution completion the cluster is expected to have max replica's number of pods. + pods, err := GetWorkerPods(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pods).To(gomega.HaveLen(maxReplica)) + }) + + } +} + +func TestRayClusterAutoscalerMinReplicasUpdate(t *testing.T) { + for name, tc := range tests { + + test := With(t) + g := gomega.NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Script for creating detached actors to trigger autoscaling + scriptsAC := newConfigMap(namespace.Name, files(test, "create_detached_actor.py")) + scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name) + + test.T().Run(name, func(_ *testing.T) { + groupName := "test-group" + + rayClusterSpecAC := rayv1ac.RayClusterSpec(). + WithEnableInTreeAutoscaling(true). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"num-cpus": "0"}). + WithTemplate(tc.HeadPodTemplateGetter())). + WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec(). + WithReplicas(1). + WithMinReplicas(0). + WithMaxReplicas(5). + WithGroupName(groupName). + WithRayStartParams(map[string]string{"num-cpus": "1"}). + WithTemplate(tc.WorkerPodTemplateGetter())) + rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name). + WithSpec(apply(rayClusterSpecAC, mountConfigMap[rayv1ac.RayClusterSpecApplyConfiguration](scripts, "/home/ray/test_scripts"))) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + // Wait for RayCluster to become ready + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1)))) + + // Update minReplicas from 0 to 2 + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Get(test.Ctx(), rayCluster.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + rayCluster.Spec.WorkerGroupSpecs[0].MinReplicas = ptr.To(int32(2)) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Update(test.Ctx(), rayCluster, metav1.UpdateOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Updated RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + // Verify that KubeRay creates an additional Pod + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(2)))) + + // Create detached actors to trigger autoscaling to 5 Pods + headPod, err := GetHeadPod(test, rayCluster) + g.Expect(err).NotTo(gomega.HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + for i := 0; i < 5; i++ { + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", fmt.Sprintf("actor%d", i)}) + } + + // Verify that the Autoscaler scales up to 5 Pods + g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium). + Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(5)))) + + // Check that replicas is set to 5 + g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(GetRayClusterWorkerGroupReplicaSum, gomega.Equal(int32(5)))) + }) + } +} diff --git a/ray-operator/test/e2eautoscaler/support.go b/ray-operator/test/e2eautoscaler/support.go new file mode 100644 index 00000000000..545466ae94c --- /dev/null +++ b/ray-operator/test/e2eautoscaler/support.go @@ -0,0 +1,175 @@ +package e2eautoscaler + +import ( + "embed" + + "github.com/stretchr/testify/assert" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +//go:embed *.py +var _files embed.FS + +func ReadFile(t Test, fileName string) []byte { + t.T().Helper() + file, err := _files.ReadFile(fileName) + assert.NoError(t.T(), err) + return file +} + +type option[T any] func(t *T) *T + +func apply[T any](t *T, options ...option[T]) *T { + for _, opt := range options { + t = opt(t) + } + return t +} + +func options[T any](options ...option[T]) option[T] { + return func(t *T) *T { + for _, opt := range options { + t = opt(t) + } + return t + } +} + +func newConfigMap(namespace string, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { + cmAC := corev1ac.ConfigMap("scripts", namespace). + WithBinaryData(map[string][]byte{}). + WithImmutable(true) + + return configMapWith(cmAC, options...) +} + +func configMapWith(configMapAC *corev1ac.ConfigMapApplyConfiguration, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { + return apply(configMapAC, options...) +} + +func file(t Test, fileName string) option[corev1ac.ConfigMapApplyConfiguration] { + return func(cmAC *corev1ac.ConfigMapApplyConfiguration) *corev1ac.ConfigMapApplyConfiguration { + cmAC.WithBinaryData(map[string][]byte{fileName: ReadFile(t, fileName)}) + return cmAC + } +} + +func files(t Test, fileNames ...string) option[corev1ac.ConfigMapApplyConfiguration] { + var files []option[corev1ac.ConfigMapApplyConfiguration] + for _, fileName := range fileNames { + files = append(files, file(t, fileName)) + } + return options(files...) +} + +func mountConfigMap[T rayv1ac.RayClusterSpecApplyConfiguration | corev1ac.PodTemplateSpecApplyConfiguration](configMap *corev1.ConfigMap, mountPath string) option[T] { + return func(t *T) *T { + switch obj := (interface{})(t).(type) { + case *rayv1ac.RayClusterSpecApplyConfiguration: + obj.HeadGroupSpec.Template.Spec.Containers[0].WithVolumeMounts(corev1ac.VolumeMount(). + WithName(configMap.Name). + WithMountPath(mountPath)) + obj.HeadGroupSpec.Template.Spec.WithVolumes(corev1ac.Volume(). + WithName(configMap.Name). + WithConfigMap(corev1ac.ConfigMapVolumeSource().WithName(configMap.Name))) + + case *corev1ac.PodTemplateSpecApplyConfiguration: + obj.Spec.Containers[0].WithVolumeMounts(corev1ac.VolumeMount(). + WithName(configMap.Name). + WithMountPath(mountPath)) + obj.Spec.WithVolumes(corev1ac.Volume(). + WithName(configMap.Name). + WithConfigMap(corev1ac.ConfigMapVolumeSource().WithName(configMap.Name))) + } + return t + } +} + +func headPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyConfiguration { + return corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithContainers(corev1ac.Container(). + WithName("ray-head"). + WithImage(GetRayImage()). + WithPorts( + corev1ac.ContainerPort().WithName("gcs").WithContainerPort(6379), + corev1ac.ContainerPort().WithName("serve").WithContainerPort(8000), + corev1ac.ContainerPort().WithName("dashboard").WithContainerPort(8265), + corev1ac.ContainerPort().WithName("client").WithContainerPort(10001), + ). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("1G"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("2G"), + })))) +} + +func headPodTemplateApplyConfigurationV2() *corev1ac.PodTemplateSpecApplyConfiguration { + return corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithRestartPolicy(corev1.RestartPolicyNever). + WithContainers(corev1ac.Container(). + WithName("ray-head"). + WithImage(GetRayImage()). + WithPorts( + corev1ac.ContainerPort().WithName("gcs").WithContainerPort(6379), + corev1ac.ContainerPort().WithName("serve").WithContainerPort(8000), + corev1ac.ContainerPort().WithName("dashboard").WithContainerPort(8265), + corev1ac.ContainerPort().WithName("client").WithContainerPort(10001), + ). + WithEnv(corev1ac.EnvVar().WithName("RAY_enable_autoscaler_v2").WithValue("1")). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("1G"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("2G"), + })))) +} + +func workerPodTemplateApplyConfiguration() *corev1ac.PodTemplateSpecApplyConfiguration { + return corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithContainers(corev1ac.Container(). + WithName("ray-worker"). + WithImage(GetRayImage()). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("1G"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1G"), + })))) +} + +func workerPodTemplateApplyConfigurationV2() *corev1ac.PodTemplateSpecApplyConfiguration { + return corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithRestartPolicy(corev1.RestartPolicyNever). + WithContainers(corev1ac.Container(). + WithName("ray-worker"). + WithImage(GetRayImage()). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("1G"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1G"), + })))) +} diff --git a/ray-operator/test/e2eautoscaler/terminate_detached_actor.py b/ray-operator/test/e2eautoscaler/terminate_detached_actor.py new file mode 100644 index 00000000000..7625a243022 --- /dev/null +++ b/ray-operator/test/e2eautoscaler/terminate_detached_actor.py @@ -0,0 +1,6 @@ +import ray +import sys + +ray.init(namespace="default_namespace") +detached_actor = ray.get_actor(sys.argv[1]) +ray.kill(detached_actor) diff --git a/ray-operator/test/e2erayservice/locust_runner.py b/ray-operator/test/e2erayservice/locust_runner.py new file mode 100644 index 00000000000..80de9dfaf64 --- /dev/null +++ b/ray-operator/test/e2erayservice/locust_runner.py @@ -0,0 +1,132 @@ +""" +This script is based on: https://raw.githubusercontent.com/ray-project/serve_workloads/main/microbenchmarks/locust_runner.py + +Run Locust on Ray cluster. + +Run this script on a Ray cluster's head node to launch one Locust worker per +CPU across the Ray cluster's nodes. + +Example command: + +$ python locust_runner.py -f locustfile.py -u 200 -r 50 --host [HOST_URL] +""" + +import os +import ray +import json +import time +import argparse +import sys +import subprocess +from tqdm import tqdm + + +ray.init() + +HTML_RESULTS_DIR = os.environ.get("HTML_RESULTS_DIR", "locust_results") +DEFAULT_RESULT_FILENAME = \ + f"{time.strftime('%Y-%m-%d-%p-%H-%M-%S-results.html')}" + +parser = argparse.ArgumentParser() +parser.add_argument( + "--html", + default=DEFAULT_RESULT_FILENAME, + type=str, + help="HTML file to save results to.", +) +parser.add_argument( + "-t", + "--run-time", + default=None, + type=str, + help="Test duration. Same option as Locust's --run-time.", +) + +args, locust_args = parser.parse_known_args() + +num_locust_workers = int(ray.available_resources()["CPU"]) +master_address = ray.util.get_node_ip_address() + +if not os.path.exists(HTML_RESULTS_DIR): + os.mkdir(HTML_RESULTS_DIR) + +# Required locust args: -f, -u, -r, --host, and any custom locustfile args +base_locust_cmd = [ + "locust", + "--headless", + f"--html={HTML_RESULTS_DIR}/{args.html}", + *locust_args, +] + + +@ray.remote(num_cpus=1) +class LocustWorker: + def __init__(self): + self.proc = None + + def start(self): + worker_locust_cmd = base_locust_cmd + [ + "--worker", + f"--master-host={master_address}", + ] + self.proc = subprocess.Popen(worker_locust_cmd) + + +print(f"Spawning {num_locust_workers} Locust worker Ray tasks.") + +# Hold reference to each locust worker to prevent them from being torn down +locust_workers = [] +start_refs = [] +for _ in tqdm(range(num_locust_workers)): + locust_worker = LocustWorker.remote() + locust_workers.append(locust_worker) + start_refs.append(locust_worker.start.remote()) + +print("Waiting for Locust worker processes to start.") + + +def wait_for_locust_workers(start_refs): + """Generator that yields whenever a worker process starts. + + Use with tqdm to track how many workers have started. If you don't need + tqdm, use ray.get(start_refs) instead of calling this function. + """ + + remaining_start_refs = start_refs + while remaining_start_refs: + finished_start_refs, remaining_start_refs = \ + ray.wait(remaining_start_refs) + for ref in finished_start_refs: + yield ray.get(ref) + + +# No-op for-loop to let tqdm track wait_for_locust_workers() progress +for _ in tqdm(wait_for_locust_workers(start_refs), total=num_locust_workers): + pass + +master_locust_cmd = base_locust_cmd + [ + "--master", + f"--expect-workers={num_locust_workers}", + "--json", +] +print(f"Locust command: {master_locust_cmd}") + +if args.run_time is not None: + master_locust_cmd += [f"--run-time={args.run_time}"] +proc = subprocess.Popen(master_locust_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +stdout, stderr = proc.communicate() + +print("STDOUT:", stdout.decode()) +print("STDERR:", stderr.decode()) + +data = json.loads(stdout.decode()) +assert len(data) == 1, f"data_len: {len(data)}" + +num_failures = data[0]["num_failures"] +num_requests = data[0]["num_requests"] + +assert num_failures == 0, f"num_failures: {num_failures}" +assert num_requests != 0, f"num_requests: {num_requests}" + +print("returncode:", proc.returncode) +sys.exit(proc.returncode) diff --git a/ray-operator/test/e2erayservice/rayservice_ha_test.go b/ray-operator/test/e2erayservice/rayservice_ha_test.go new file mode 100644 index 00000000000..0638007e1ea --- /dev/null +++ b/ray-operator/test/e2erayservice/rayservice_ha_test.go @@ -0,0 +1,289 @@ +package e2e + +import ( + "sync" + "testing" + "time" + + . "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" + "github.com/ray-project/kuberay/ray-operator/test/sampleyaml" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestStaticRayService(t *testing.T) { + rayserviceYamlFile := "testdata/rayservice.static.yaml" + locustYamlFile := "testdata/locust-cluster.const-rate.yaml" + + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Create a ConfigMap with Locust runner script + configMapAC := newConfigMap(namespace.Name, files(test, "locust_runner.py")) + configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name) + + // Create the RayService for testing + KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name) + rayService, err := GetRayService(test, namespace.Name, "test-rayservice") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + + // Create Locust RayCluster + KubectlApplyYAML(test, locustYamlFile, namespace.Name) + locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name) + + g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0)))) + + headPod, err := GetHeadPod(test, locustCluster) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Install Locust in the head Pod + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"}) + + // Run Locust test + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{ + "python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000", + }) +} + +func TestAutoscalingRayService(t *testing.T) { + rayserviceYamlFile := "testdata/rayservice.autoscaling.yaml" + locustYamlFile := "testdata/locust-cluster.burst.yaml" + numberOfPodsWhenSteady := 1 + + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Create a ConfigMap with Locust runner script + configMapAC := newConfigMap(namespace.Name, files(test, "locust_runner.py")) + configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name) + + // Create the RayService for testing + KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name) + rayService, err := GetRayService(test, namespace.Name, "test-rayservice") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + + // Get the underlying RayCluster of the RayService + rayService, err = GetRayService(test, namespace.Name, rayService.Name) + g.Expect(err).NotTo(HaveOccurred()) + rayServiceUnderlyingRayCluster, err := GetRayCluster(test, namespace.Name, rayService.Status.ActiveServiceStatus.RayClusterName) + g.Expect(err).NotTo(HaveOccurred()) + + // Check the number of worker pods is correct when RayService is steady + g.Eventually(WorkerPods(test, rayServiceUnderlyingRayCluster), TestTimeoutShort).Should(HaveLen(numberOfPodsWhenSteady)) + + // Create Locust RayCluster + KubectlApplyYAML(test, locustYamlFile, namespace.Name) + locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name) + + g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0)))) + + headPod, err := GetHeadPod(test, locustCluster) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Install Locust in the head Pod + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"}) + + // Run Locust test + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{ + "python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000", + }) + + // Check the number of worker pods is more when RayService right after the burst + pods, err := GetWorkerPods(test, rayServiceUnderlyingRayCluster) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(pods)).Should(BeNumerically(">", numberOfPodsWhenSteady)) + + // Check the number of worker pods is correct when RayService is steady + g.Eventually(WorkerPods(test, rayServiceUnderlyingRayCluster), TestTimeoutLong).Should(HaveLen(numberOfPodsWhenSteady)) +} + +func TestRayServiceZeroDowntimeUpgrade(t *testing.T) { + rayserviceYamlFile := "testdata/rayservice.static.yaml" + locustYamlFile := "testdata/locust-cluster.const-rate.yaml" + + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Create a ConfigMap with Locust runner script + configMapAC := newConfigMap(namespace.Name, files(test, "locust_runner.py")) + configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name) + + // Create the RayService for testing + KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name) + rayService, err := GetRayService(test, namespace.Name, "test-rayservice") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + + // Create Locust RayCluster + KubectlApplyYAML(test, locustYamlFile, namespace.Name) + locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name) + + g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0)))) + + headPod, err := GetHeadPod(test, locustCluster) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name) + + // Install Locust in the head Pod + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"}) + + // Start a goroutine to perform zero-downtime upgrade + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + + test.T().Logf("Waiting several seconds before updating RayService") + time.Sleep(30 * time.Second) + + test.T().Logf("Updating RayService") + rayService, err := GetRayService(test, namespace.Name, "test-rayservice") + g.Expect(err).NotTo(HaveOccurred()) + rayClusterName := rayService.Status.ActiveServiceStatus.RayClusterName + + newRayService := rayService.DeepCopy() + newRayService.Spec.RayClusterSpec.RayVersion = "" + newRayService, err = test.Client().Ray().RayV1().RayServices(newRayService.Namespace).Update(test.Ctx(), newRayService, metav1.UpdateOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + // Assert that the active RayCluster is eventually different + g.Eventually(RayService(test, newRayService.Namespace, newRayService.Name), TestTimeoutShort).Should(WithTransform(func(rayService *rayv1.RayService) string { + return rayService.Status.ActiveServiceStatus.RayClusterName + }, Not(Equal(rayClusterName)))) + }() + + // Run Locust test + ExecPodCmd(test, headPod, common.RayHeadContainer, []string{ + "python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000", + }) + + wg.Wait() +} + +func TestRayServiceGCSFaultTolerance(t *testing.T) { + rayserviceYamlFile := "testdata/ray-service.ft.yaml" + locustYamlFile := "testdata/locust-cluster.const-rate.yaml" + + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + + // Create a ConfigMap with Locust runner script + configMapAC := newConfigMap(namespace.Name, files(test, "locust_runner.py")) + configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name) + + // Create the RayService for testing + KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name) + rayService, err := GetRayService(test, namespace.Name, "test-rayservice") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort). + Should(WithTransform(RayServicesNumEndPoints, Equal(int32(1)))) + + // Get the underlying RayCluster of the RayService + rayService, err = GetRayService(test, namespace.Name, rayService.Name) + g.Expect(err).NotTo(HaveOccurred()) + rayServiceUnderlyingRayCluster, err := GetRayCluster(test, namespace.Name, rayService.Status.ActiveServiceStatus.RayClusterName) + g.Expect(err).NotTo(HaveOccurred()) + + // Create Locust RayCluster + KubectlApplyYAML(test, locustYamlFile, namespace.Name) + locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster") + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name) + + g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0)))) + + locustHeadPod, err := GetHeadPod(test, locustCluster) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Found head pod %s/%s", locustHeadPod.Namespace, locustHeadPod.Name) + + // Install Locust in the Locust head Pod + ExecPodCmd(test, locustHeadPod, common.RayHeadContainer, []string{"pip", "install", "locust"}) + + // Get current head pod + oldHeadPod, err := GetHeadPod(test, rayServiceUnderlyingRayCluster) + g.Expect(err).NotTo(HaveOccurred()) + // Store the name of the head Pod in a variable + oldHeadPodName := oldHeadPod.Name + // Kill gcs server + ExecPodCmd(test, oldHeadPod, common.RayHeadContainer, []string{"pkill", "gcs_server"}) + // wait for head pod not to be ready + g.Eventually(HeadPod(test, rayServiceUnderlyingRayCluster), TestTimeoutShort).Should(WithTransform(sampleyaml.IsPodRunningAndReady, BeFalse())) + + startTime := time.Now() + // Run Locust test + ExecPodCmd(test, locustHeadPod, common.RayHeadContainer, []string{ + "python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000", + }) + // Because this test shares the Locust RayCluster YAML file with other tests, + // we need to ensure the YAML file is not accidentally updated. + g.Expect(time.Since(startTime) > 2*time.Minute).To(BeTrue()) + + newHeadPod, err := GetHeadPod(test, rayServiceUnderlyingRayCluster) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(newHeadPod.Name).To(Equal(oldHeadPodName)) + g.Expect(newHeadPod.Status.ContainerStatuses[0].RestartCount).To(Equal(int32(1))) + // Verify that all pods are running + g.Expect(GetHeadPod(test, rayServiceUnderlyingRayCluster)).Should(WithTransform(sampleyaml.IsPodRunningAndReady, BeTrue())) + g.Expect(GetWorkerPods(test, rayServiceUnderlyingRayCluster)).Should(WithTransform(sampleyaml.AllPodsRunningAndReady, BeTrue())) +} diff --git a/ray-operator/test/e2erayservice/rayservice_in_place_update_test.go b/ray-operator/test/e2erayservice/rayservice_in_place_update_test.go new file mode 100644 index 00000000000..b2bb3ef0387 --- /dev/null +++ b/ray-operator/test/e2erayservice/rayservice_in_place_update_test.go @@ -0,0 +1,93 @@ +package e2e + +import ( + "strings" + "testing" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + "github.com/ray-project/kuberay/ray-operator/test/sampleyaml" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayServiceInPlaceUpdate(t *testing.T) { + test := With(t) + g := NewWithT(t) + + // Create a namespace + namespace := test.NewTestNamespace() + rayServiceName := "rayservice-sample" + + rayServiceAC := rayv1ac.RayService(rayServiceName, namespace.Name).WithSpec(rayServiceSampleYamlApplyConfiguration()) + + // TODO: This test will fail on Ray 2.40.0. Pin the Ray version to 2.9.0 as a workaround. Need to remove this after the issue is fixed. + rayServiceAC.Spec.RayClusterSpec.WithRayVersion("2.9.0") + rayServiceAC.Spec.RayClusterSpec.HeadGroupSpec.Template.Spec.Containers[0].WithImage("rayproject/ray:2.9.0") + + rayService, err := test.Client().Ray().RayV1().RayServices(namespace.Name).Apply(test.Ctx(), rayServiceAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayService).NotTo(BeNil()) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + + // Get the latest RayService + rayService, err = GetRayService(test, namespace.Name, rayServiceName) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayService).NotTo(BeNil()) + + // Create curl pod + curlPodName := "curl-pod" + curlContainerName := "curl-container" + + curlPod, err := CreateCurlPod(test, curlPodName, curlContainerName, namespace.Name) + g.Expect(err).NotTo(HaveOccurred()) + // Wait until curl pod is created + g.Eventually(func(g Gomega) *corev1.Pod { + updatedCurlPod, err := test.Client().Core().CoreV1().Pods(curlPod.Namespace).Get(test.Ctx(), curlPod.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(HaveOccurred()) + return updatedCurlPod + }, TestTimeoutShort).Should(WithTransform(sampleyaml.IsPodRunningAndReady, BeTrue())) + + // test the default curl result + g.Eventually(func(g Gomega) { + // curl /fruit + stdout, _ := curlRayServicePod(test, rayService, curlPod, curlContainerName, "/fruit", `["MANGO", 2]`) + g.Expect(stdout.String()).To(Equal("6")) + // curl /calc + stdout, _ = curlRayServicePod(test, rayService, curlPod, curlContainerName, "/calc", `["MUL", 3]`) + g.Expect(stdout.String()).To(Equal("15 pizzas please!")) + }, TestTimeoutShort).Should(Succeed()) + + // In-place update + // Parse ServeConfigV2 and replace the string in the simplest way to update it. + rayService, err = GetRayService(test, namespace.Name, rayService.Name) + g.Expect(err).NotTo(HaveOccurred()) + + serveConfig := rayService.Spec.ServeConfigV2 + serveConfig = strings.Replace(serveConfig, "price: 3", "price: 4", -1) + serveConfig = strings.Replace(serveConfig, "factor: 5", "factor: 3", -1) + + rayService.Spec.ServeConfigV2 = serveConfig + rayService, err = test.Client().Ray().RayV1().RayServices(namespace.Name).Update( + test.Ctx(), + rayService, + metav1.UpdateOptions{}, + ) + g.Expect(err).NotTo(HaveOccurred()) + + // Test the new price and factor + g.Eventually(func(g Gomega) { + // curl /fruit + stdout, _ := curlRayServicePod(test, rayService, curlPod, curlContainerName, "/fruit", `["MANGO", 2]`) + g.Expect(stdout.String()).To(Equal("8")) + // curl /calc + stdout, _ = curlRayServicePod(test, rayService, curlPod, curlContainerName, "/calc", `["MUL", 3]`) + g.Expect(stdout.String()).To(Equal("9 pizzas please!")) + }, TestTimeoutShort).Should(Succeed()) +} diff --git a/ray-operator/test/e2erayservice/support.go b/ray-operator/test/e2erayservice/support.go new file mode 100644 index 00000000000..f77a92f9fbb --- /dev/null +++ b/ray-operator/test/e2erayservice/support.go @@ -0,0 +1,164 @@ +package e2e + +import ( + "bytes" + "embed" + "fmt" + + "github.com/stretchr/testify/assert" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +//go:embed *.py +var _files embed.FS + +func ReadFile(t Test, fileName string) []byte { + t.T().Helper() + file, err := _files.ReadFile(fileName) + assert.NoError(t.T(), err) + return file +} + +type option[T any] func(t *T) *T + +func apply[T any](t *T, options ...option[T]) *T { + for _, opt := range options { + t = opt(t) + } + return t +} + +func options[T any](options ...option[T]) option[T] { + return func(t *T) *T { + for _, opt := range options { + t = opt(t) + } + return t + } +} + +func newConfigMap(namespace string, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { + cmAC := corev1ac.ConfigMap("locust-runner-script", namespace). + WithBinaryData(map[string][]byte{}). + WithImmutable(true) + + return configMapWith(cmAC, options...) +} + +func configMapWith(configMapAC *corev1ac.ConfigMapApplyConfiguration, options ...option[corev1ac.ConfigMapApplyConfiguration]) *corev1ac.ConfigMapApplyConfiguration { + return apply(configMapAC, options...) +} + +func file(t Test, fileName string) option[corev1ac.ConfigMapApplyConfiguration] { + return func(cmAC *corev1ac.ConfigMapApplyConfiguration) *corev1ac.ConfigMapApplyConfiguration { + cmAC.WithBinaryData(map[string][]byte{fileName: ReadFile(t, fileName)}) + return cmAC + } +} + +func files(t Test, fileNames ...string) option[corev1ac.ConfigMapApplyConfiguration] { + var files []option[corev1ac.ConfigMapApplyConfiguration] + for _, fileName := range fileNames { + files = append(files, file(t, fileName)) + } + return options(files...) +} + +func curlRayServicePod( + t Test, + rayService *rayv1.RayService, + curlPod *corev1.Pod, + curlPodContainerName, + rayServicePath, + body string, +) (bytes.Buffer, bytes.Buffer) { + cmd := []string{ + "curl", + "-X", "POST", + "-H", "Content-Type: application/json", + fmt.Sprintf("%s-serve-svc.%s.svc.cluster.local:8000%s", rayService.Name, rayService.Namespace, rayServicePath), + "-d", body, + } + + return ExecPodCmd(t, curlPod, curlPodContainerName, cmd) +} + +func rayServiceSampleYamlApplyConfiguration() *rayv1ac.RayServiceSpecApplyConfiguration { + return rayv1ac.RayServiceSpec().WithServeConfigV2(`applications: + - name: fruit_app + import_path: fruit.deployment_graph + route_prefix: /fruit + runtime_env: + working_dir: "https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip" + deployments: + - name: MangoStand + num_replicas: 1 + user_config: + price: 3 + ray_actor_options: + num_cpus: 0.1 + - name: OrangeStand + num_replicas: 1 + user_config: + price: 2 + ray_actor_options: + num_cpus: 0.1 + - name: FruitMarket + num_replicas: 1 + ray_actor_options: + num_cpus: 0.1 + - name: math_app + import_path: conditional_dag.serve_dag + route_prefix: /calc + runtime_env: + working_dir: "https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip" + deployments: + - name: Adder + num_replicas: 1 + user_config: + increment: 3 + ray_actor_options: + num_cpus: 0.1 + - name: Multiplier + num_replicas: 1 + user_config: + factor: 5 + ray_actor_options: + num_cpus: 0.1 + - name: Router + ray_actor_options: + num_cpus: 0.1 + num_replicas: 1`). + WithRayClusterSpec(rayv1ac.RayClusterSpec(). + WithRayVersion(GetRayVersion()). + WithHeadGroupSpec(rayv1ac.HeadGroupSpec(). + WithRayStartParams(map[string]string{"dashboard-host": "0.0.0.0"}). + WithTemplate(corev1ac.PodTemplateSpec(). + WithSpec(corev1ac.PodSpec(). + WithContainers(corev1ac.Container(). + WithName("ray-head"). + WithImage(GetRayImage()). + WithPorts( + corev1ac.ContainerPort().WithName("gcs-server").WithContainerPort(6379), + corev1ac.ContainerPort().WithName("serve").WithContainerPort(8000), + corev1ac.ContainerPort().WithName("dashboard").WithContainerPort(8265), + corev1ac.ContainerPort().WithName("client").WithContainerPort(10001), + ). + WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + })))))), + ) +} diff --git a/ray-operator/test/e2erayservice/testdata/locust-cluster.burst.yaml b/ray-operator/test/e2erayservice/testdata/locust-cluster.burst.yaml new file mode 100644 index 00000000000..6f158e7c749 --- /dev/null +++ b/ray-operator/test/e2erayservice/testdata/locust-cluster.burst.yaml @@ -0,0 +1,71 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: locust-cluster +spec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + volumeMounts: + - mountPath: /locustfile + name: locustfile-volume + - mountPath: /locust-runner + name: locust-runner-volume + volumes: + - name: locustfile-volume + configMap: + name: locustfile-config + - name: locust-runner-volume + configMap: + name: locust-runner-script +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: locustfile-config +data: + locustfile.py: | + from locust import FastHttpUser, task, constant, LoadTestShape + import os + + class ConstantUser(FastHttpUser): + wait_time = constant(1) + network_timeout = None + connection_timeout = None + + @task + def hello_world(self): + self.client.post("/") + + class StagesShape(LoadTestShape): + stages = [ + {"duration": 30, "users": 10, "spawn_rate": 10}, + {"duration": 60, "users": 120, "spawn_rate": 10}, + ] + + def tick(self): + run_time = self.get_run_time() + for stage in self.stages: + if run_time < stage["duration"]: + tick_data = (stage["users"], stage["spawn_rate"]) + return tick_data + return None diff --git a/ray-operator/test/e2erayservice/testdata/locust-cluster.const-rate.yaml b/ray-operator/test/e2erayservice/testdata/locust-cluster.const-rate.yaml new file mode 100644 index 00000000000..b4db3821260 --- /dev/null +++ b/ray-operator/test/e2erayservice/testdata/locust-cluster.const-rate.yaml @@ -0,0 +1,70 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: locust-cluster +spec: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + volumeMounts: + - mountPath: /locustfile + name: locustfile-volume + - mountPath: /locust-runner + name: locust-runner-volume + volumes: + - name: locustfile-volume + configMap: + name: locustfile-config + - name: locust-runner-volume + configMap: + name: locust-runner-script +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: locustfile-config +data: + locustfile.py: | + from locust import FastHttpUser, task, constant, LoadTestShape + import os + + class ConstantUser(FastHttpUser): + wait_time = constant(1) + network_timeout = None + connection_timeout = None + + @task + def hello_world(self): + self.client.post("/") + + class StagesShape(LoadTestShape): + stages = [ + {"duration": 150, "users": 10, "spawn_rate": 10}, + ] + + def tick(self): + run_time = self.get_run_time() + for stage in self.stages: + if run_time < stage["duration"]: + tick_data = (stage["users"], stage["spawn_rate"]) + return tick_data + return None diff --git a/ray-operator/test/e2erayservice/testdata/ray-service.ft.yaml b/ray-operator/test/e2erayservice/testdata/ray-service.ft.yaml new file mode 100644 index 00000000000..e56282963e1 --- /dev/null +++ b/ray-operator/test/e2erayservice/testdata/ray-service.ft.yaml @@ -0,0 +1,161 @@ +--- +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: test-rayservice + annotations: + ray.io/ft-enabled: "true" +spec: + excludeHeadPodFromServeSvc: true + serveConfigV2: | + applications: + - name: no_ops + route_prefix: / + import_path: microbenchmarks.no_ops:app_builder + args: + num_forwards: 0 + runtime_env: + working_dir: https://github.com/ray-project/serve_workloads/archive/a9f184f4d9ddb7f9a578502ae106470f87a702ef.zip + deployments: + - name: NoOp + num_replicas: 1 + ray_actor_options: + num_cpus: 1 + rayClusterConfig: + rayVersion: "2.9.0" + headGroupSpec: + rayStartParams: + num-cpus: "0" + redis-password: $REDIS_PASSWORD + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.40.0 + env: + - name: RAY_REDIS_ADDRESS + value: redis:6379 + # This environment variable is used in the `rayStartParams` above. + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: redis-password-secret + key: password + - name: RAY_gcs_rpc_server_reconnect_timeout_s + value: "20" + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + groupName: small-group + rayStartParams: + num-cpus: "1" + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.40.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 1G +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "500m" + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== diff --git a/ray-operator/test/e2erayservice/testdata/rayservice.autoscaling.yaml b/ray-operator/test/e2erayservice/testdata/rayservice.autoscaling.yaml new file mode 100644 index 00000000000..59e90f62b1d --- /dev/null +++ b/ray-operator/test/e2erayservice/testdata/rayservice.autoscaling.yaml @@ -0,0 +1,79 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: test-rayservice +spec: + serveConfigV2: | + applications: + - name: no_ops + route_prefix: / + import_path: microbenchmarks.no_ops:app_builder + args: + num_forwards: 0 + runtime_env: + working_dir: https://github.com/ray-project/serve_workloads/archive/a2e2405f3117f1b4134b6924b5f44c4ff0710c00.zip + deployments: + - name: NoOp + autoscaling_config: + metrics_interval_s: 0.2 + min_replicas: 1 + max_replicas: 14 + look_back_period_s: 2 + downscale_delay_s: 5 + upscale_delay_s: 2 + target_num_ongoing_requests_per_replica: 1 + graceful_shutdown_timeout_s: 5 + max_concurrent_queries: 1000 + ray_actor_options: + num_cpus: 0.5 + rayClusterConfig: + rayVersion: '2.9.0' + enableInTreeAutoscaling: true + autoscalerOptions: + idleTimeoutSeconds: 60 + headGroupSpec: + rayStartParams: + num-cpus: "0" + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + workerGroupSpecs: + - replicas: 0 + minReplicas: 0 + maxReplicas: 5 + groupName: small-group + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.0 + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 1G diff --git a/ray-operator/test/e2erayservice/testdata/rayservice.static.yaml b/ray-operator/test/e2erayservice/testdata/rayservice.static.yaml new file mode 100644 index 00000000000..e4823e9bea6 --- /dev/null +++ b/ray-operator/test/e2erayservice/testdata/rayservice.static.yaml @@ -0,0 +1,65 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: test-rayservice +spec: + serveConfigV2: | + proxy_location: EveryNode + applications: + - name: no_ops + route_prefix: / + import_path: microbenchmarks.no_ops:app_builder + args: + num_forwards: 0 + runtime_env: + working_dir: https://github.com/ray-project/serve_workloads/archive/a2e2405f3117f1b4134b6924b5f44c4ff0710c00.zip + deployments: + - name: NoOp + num_replicas: 2 + max_replicas_per_node: 1 + ray_actor_options: + num_cpus: 1 + rayClusterConfig: + rayVersion: '2.9.0' + headGroupSpec: + rayStartParams: {} + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.9.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 2G + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + workerGroupSpecs: + - replicas: 1 + minReplicas: 1 + maxReplicas: 1 + groupName: small-group + rayStartParams: + num-cpus: "1" + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.9.0 + resources: + requests: + cpu: 300m + memory: 1G + limits: + cpu: 500m + memory: 1G diff --git a/ray-operator/test/sampleyaml/raycluster_test.go b/ray-operator/test/sampleyaml/raycluster_test.go new file mode 100644 index 00000000000..85dfeb51a2b --- /dev/null +++ b/ray-operator/test/sampleyaml/raycluster_test.go @@ -0,0 +1,123 @@ +package sampleyaml + +import ( + "path" + "testing" + + . "github.com/onsi/gomega" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestRayCluster(t *testing.T) { + tests := []struct { + name string + }{ + { + name: "ray-cluster.autoscaler-v2.yaml", + }, + { + name: "ray-cluster.autoscaler.yaml", + }, + { + name: "ray-cluster.complete.yaml", + }, + { + name: "ray-cluster.custom-head-service.yaml", + }, + { + name: "ray-cluster.embed-grafana.yaml", + }, + { + name: "ray-cluster.external-redis-uri.yaml", + }, + { + name: "ray-cluster.external-redis.yaml", + }, + { + name: "ray-cluster.head-command.yaml", + }, + { + name: "ray-cluster.heterogeneous.yaml", + }, + { + name: "ray-cluster.overwrite-command.yaml", + }, + { + name: "ray-cluster.py-spy.yaml", + }, + { + name: "ray-cluster.sample.yaml", + }, + { + name: "ray-cluster.separate-ingress.yaml", + }, + { + name: "ray-cluster.tls.yaml", + }, + { + name: "ray-cluster.fluentbit.yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + + yamlFilePath := path.Join(GetSampleYAMLDir(test), tt.name) + namespace := test.NewTestNamespace() + rayClusterFromYaml := DeserializeRayClusterYAML(test, yamlFilePath) + KubectlApplyYAML(test, yamlFilePath, namespace.Name) + + rayCluster, err := GetRayCluster(test, namespace.Name, rayClusterFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayCluster).NotTo(BeNil()) + + test.T().Logf("Waiting for RayCluster %s/%s to be ready", namespace.Name, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + rayCluster, err = GetRayCluster(test, namespace.Name, rayCluster.Name) + g.Expect(err).NotTo(HaveOccurred()) + + // Check if the RayCluster created correct number of pods + var desiredWorkerReplicas int32 + if rayCluster.Spec.WorkerGroupSpecs != nil { + for _, workerGroupSpec := range rayCluster.Spec.WorkerGroupSpecs { + desiredWorkerReplicas += *workerGroupSpec.Replicas + } + } + g.Eventually(WorkerPods(test, rayCluster), TestTimeoutShort).Should(HaveLen(int(desiredWorkerReplicas))) + g.Expect(GetRayCluster(test, namespace.Name, rayCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(desiredWorkerReplicas))) + + // Check if the head pod is ready + g.Eventually(HeadPod(test, rayCluster), TestTimeoutShort).Should(WithTransform(IsPodRunningAndReady, BeTrue())) + + // Check if all worker pods are ready + g.Eventually(WorkerPods(test, rayCluster), TestTimeoutShort).Should(WithTransform(AllPodsRunningAndReady, BeTrue())) + + // Check that all pods can submit jobs + g.Eventually(SubmitJobsToAllPods(test, rayCluster), TestTimeoutShort).Should(Succeed()) + + // Delete all pods after setting quota to 0 to avoid recreating pods + KubectlApplyQuota(test, namespace.Name, "--hard=cpu=0,memory=0G,pods=0") + KubectlDeleteAllPods(test, namespace.Name) + // The HeadPodReady condition should now be False with a HeadPodNotFound reason. + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) + // The RayClusterProvisioned condition should still be True. + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + // The RayClusterReplicaFailure condition now be True with a FailedCreateHeadPod reason due to the quota limit. + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterReplicaFailure), MatchCondition(metav1.ConditionTrue, "FailedCreateHeadPod"))) + }) + } +} diff --git a/ray-operator/test/sampleyaml/rayjob_test.go b/ray-operator/test/sampleyaml/rayjob_test.go new file mode 100644 index 00000000000..141d92ba5e2 --- /dev/null +++ b/ray-operator/test/sampleyaml/rayjob_test.go @@ -0,0 +1,83 @@ +package sampleyaml + +import ( + "path" + "testing" + + . "github.com/onsi/gomega" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayJob(t *testing.T) { + tests := []struct { + name string + }{ + { + name: "ray-job.custom-head-svc.yaml", + }, + { + name: "ray-job.modin.yaml", + }, + { + name: "ray-job.resources.yaml", + }, + { + name: "ray-job.sample.yaml", + }, + { + name: "ray-job.shutdown.yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + + yamlFilePath := path.Join(GetSampleYAMLDir(test), tt.name) + namespace := test.NewTestNamespace() + rayJobFromYaml := DeserializeRayJobYAML(test, yamlFilePath) + KubectlApplyYAML(test, yamlFilePath, namespace.Name) + + rayJob, err := GetRayJob(test, namespace.Name, rayJobFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayJob).NotTo(BeNil()) + + // Wait for RayCluster name to be populated + g.Eventually(RayJob(test, rayJob.Namespace, rayJob.Name), TestTimeoutShort). + Should(WithTransform(RayJobClusterName, Not(BeEmpty()))) + + rayJob, err = GetRayJob(test, rayJob.Namespace, rayJob.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayJob).NotTo(BeNil()) + + test.T().Logf("Waiting for RayCluster %s/%s to be ready", namespace.Name, rayJob.Status.RayClusterName) + g.Eventually(RayCluster(test, namespace.Name, rayJob.Status.RayClusterName), TestTimeoutMedium). + Should(WithTransform(RayClusterState, Equal(rayv1.Ready))) + rayCluster, err := GetRayCluster(test, namespace.Name, rayJob.Status.RayClusterName) + g.Expect(err).NotTo(HaveOccurred()) + + // Check if the RayCluster created correct number of pods + var desiredWorkerReplicas int32 + if rayCluster.Spec.WorkerGroupSpecs != nil { + for _, workerGroupSpec := range rayCluster.Spec.WorkerGroupSpecs { + desiredWorkerReplicas += *workerGroupSpec.Replicas + } + } + g.Eventually(WorkerPods(test, rayCluster), TestTimeoutShort).Should(HaveLen(int(desiredWorkerReplicas))) + g.Expect(GetRayCluster(test, namespace.Name, rayCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(desiredWorkerReplicas))) + + // Check if the head pod is ready + g.Eventually(HeadPod(test, rayCluster), TestTimeoutShort).Should(WithTransform(IsPodRunningAndReady, BeTrue())) + + // Check if all worker pods are ready + g.Eventually(WorkerPods(test, rayCluster), TestTimeoutShort).Should(WithTransform(AllPodsRunningAndReady, BeTrue())) + + g.Eventually(RayJob(test, namespace.Name, rayJobFromYaml.Name), TestTimeoutMedium).Should(WithTransform(RayJobDeploymentStatus, Equal(rayv1.JobDeploymentStatusComplete))) + + g.Eventually(RayJob(test, namespace.Name, rayJobFromYaml.Name), TestTimeoutMedium).Should(WithTransform(RayJobStatus, Equal(rayv1.JobStatusSucceeded))) + }) + } +} diff --git a/ray-operator/test/sampleyaml/rayservice_test.go b/ray-operator/test/sampleyaml/rayservice_test.go new file mode 100644 index 00000000000..85f820bc39b --- /dev/null +++ b/ray-operator/test/sampleyaml/rayservice_test.go @@ -0,0 +1,74 @@ +package sampleyaml + +import ( + "path" + "testing" + + . "github.com/onsi/gomega" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func TestRayService(t *testing.T) { + tests := []struct { + name string + }{ + { + name: "ray-service.custom-serve-service.yaml", + }, + { + name: "ray-service.different-port.yaml", + }, + { + name: "ray-service.high-availability.yaml", + }, + { + name: "ray-service.sample.yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + + yamlFilePath := path.Join(GetSampleYAMLDir(test), tt.name) + namespace := test.NewTestNamespace() + rayServiceFromYaml := DeserializeRayServiceYAML(test, yamlFilePath) + KubectlApplyYAML(test, yamlFilePath, namespace.Name) + + rayService, err := GetRayService(test, namespace.Name, rayServiceFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayService).NotTo(BeNil()) + + test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name) + g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium). + Should(WithTransform(RayServiceStatus, Equal(rayv1.Running))) + // Get the latest rayService + rayService, err = GetRayService(test, namespace.Name, rayServiceFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayService).NotTo(BeNil()) + + rayClusterName := rayService.Status.ActiveServiceStatus.RayClusterName + + rayCluster, err := GetRayCluster(test, namespace.Name, rayClusterName) + g.Expect(err).NotTo(HaveOccurred()) + + // Check if the head pod is ready + g.Eventually(HeadPod(test, rayCluster), TestTimeoutShort).Should(WithTransform(IsPodRunningAndReady, BeTrue())) + + // Check if .status.numServeEndpoints is greater than zero + g.Eventually(func(g Gomega) int32 { + rs, err := GetRayService(test, namespace.Name, rayServiceFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + return rs.Status.NumServeEndpoints + }, TestTimeoutShort).Should(BeNumerically(">", 0)) + + // Check if all applications are running + g.Eventually(RayService(test, namespace.Name, rayServiceFromYaml.Name), TestTimeoutMedium).Should(WithTransform(AllAppsRunning, BeTrue())) + // Query dashboard to get the serve application status in head pod + g.Eventually(QueryDashboardGetAppStatus(test, rayCluster), TestTimeoutShort).Should(Succeed()) + }) + } +} diff --git a/ray-operator/test/sampleyaml/support.go b/ray-operator/test/sampleyaml/support.go new file mode 100644 index 00000000000..63819b89a16 --- /dev/null +++ b/ray-operator/test/sampleyaml/support.go @@ -0,0 +1,110 @@ +package sampleyaml + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + + . "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils" + . "github.com/ray-project/kuberay/ray-operator/test/support" +) + +func GetSampleYAMLDir(t Test) string { + t.T().Helper() + _, b, _, _ := runtime.Caller(0) + sampleYAMLDir := filepath.Join(filepath.Dir(b), "../../config/samples") + info, err := os.Stat(sampleYAMLDir) + assert.NoError(t.T(), err) + assert.True(t.T(), info.IsDir()) + return sampleYAMLDir +} + +func IsPodRunningAndReady(pod *corev1.Pod) bool { + if pod.Status.Phase != corev1.PodRunning { + return false + } + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true + } + } + return false +} + +func AllPodsRunningAndReady(pods []corev1.Pod) bool { + for _, pod := range pods { + if !IsPodRunningAndReady(&pod) { + return false + } + } + return true +} + +func SubmitJobsToAllPods(t Test, rayCluster *rayv1.RayCluster) func(Gomega) { + return func(g Gomega) { + pods, err := GetAllPods(t, rayCluster) + g.Expect(err).NotTo(HaveOccurred()) + cmd := []string{ + "python", + "-c", + "import ray; ray.init(); print(ray.cluster_resources())", + } + for _, pod := range pods { + container := pod.Spec.Containers[utils.RayContainerIndex] // Directly access the Ray container + ExecPodCmd(t, &pod, container.Name, cmd) + } + } +} + +func getApps(rayService *rayv1.RayService) map[string]rayv1.AppStatus { + apps := make(map[string]rayv1.AppStatus) + for k, v := range rayService.Status.ActiveServiceStatus.Applications { + apps[k] = v + } + return apps +} + +func AllAppsRunning(rayService *rayv1.RayService) bool { + appStatuses := getApps(rayService) + if len(appStatuses) == 0 { + return false + } + + for _, appStatus := range appStatuses { + if appStatus.Status != rayv1.ApplicationStatusEnum.RUNNING { + return false + } + } + return true +} + +func QueryDashboardGetAppStatus(t Test, rayCluster *rayv1.RayCluster) func(Gomega) { + return func(g Gomega) { + rayDashboardClient := &utils.RayDashboardClient{} + pod, err := GetHeadPod(t, rayCluster) + g.Expect(err).ToNot(HaveOccurred()) + + localPort := 8265 + remotePort := 8265 + stopChan, err := SetupPortForward(t, pod.Name, pod.Namespace, localPort, remotePort) + defer close(stopChan) + + g.Expect(err).ToNot(HaveOccurred()) + url := fmt.Sprintf("127.0.0.1:%d", localPort) + + err = rayDashboardClient.InitClient(t.Ctx(), url, rayCluster) + g.Expect(err).ToNot(HaveOccurred()) + serveDetails, err := rayDashboardClient.GetServeDetails(t.Ctx()) + g.Expect(err).ToNot(HaveOccurred()) + + for _, value := range serveDetails.Applications { + g.Expect(value.ServeApplicationStatus.Status).To(Equal(rayv1.ApplicationStatusEnum.RUNNING)) + } + } +} diff --git a/ray-operator/test/support/batch.go b/ray-operator/test/support/batch.go index cd86d0a9653..acbef608826 100644 --- a/ray-operator/test/support/batch.go +++ b/ray-operator/test/support/batch.go @@ -22,8 +22,3 @@ func Job(t Test, namespace, name string) func(g gomega.Gomega) *batchv1.Job { return job } } - -func GetJob(t Test, namespace, name string) *batchv1.Job { - t.T().Helper() - return Job(t, namespace, name)(t) -} diff --git a/ray-operator/test/support/client.go b/ray-operator/test/support/client.go index 673ef51274e..3947da5dbf9 100644 --- a/ray-operator/test/support/client.go +++ b/ray-operator/test/support/client.go @@ -3,6 +3,7 @@ package support import ( "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -16,12 +17,14 @@ type Client interface { Core() kubernetes.Interface Ray() rayclient.Interface Dynamic() dynamic.Interface + Config() rest.Config } type testClient struct { core kubernetes.Interface ray rayclient.Interface dynamic dynamic.Interface + config rest.Config } var _ Client = (*testClient)(nil) @@ -38,6 +41,10 @@ func (t *testClient) Dynamic() dynamic.Interface { return t.dynamic } +func (t *testClient) Config() rest.Config { + return t.config +} + func newTestClient() (Client, error) { cfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( clientcmd.NewDefaultClientConfigLoadingRules(), @@ -66,5 +73,6 @@ func newTestClient() (Client, error) { core: kubeClient, ray: rayClient, dynamic: dynamicClient, + config: *cfg, }, nil } diff --git a/ray-operator/test/support/core.go b/ray-operator/test/support/core.go index 7942245d7a0..97c6d13d4cb 100644 --- a/ray-operator/test/support/core.go +++ b/ray-operator/test/support/core.go @@ -1,12 +1,22 @@ package support import ( + "bytes" + "fmt" "io" + "net/http" + "strings" + . "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/remotecommand" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/portforward" + "k8s.io/client-go/transport/spdy" ) func Pods(t Test, namespace string, options ...Option[*metav1.ListOptions]) func(g gomega.Gomega) []corev1.Pod { @@ -14,7 +24,7 @@ func Pods(t Test, namespace string, options ...Option[*metav1.ListOptions]) func listOptions := &metav1.ListOptions{} for _, option := range options { - t.Expect(option.applyTo(listOptions)).To(gomega.Succeed()) + g.Expect(option.applyTo(listOptions)).To(gomega.Succeed()) } pods, err := t.Client().Core().CoreV1().Pods(namespace).List(t.Ctx(), *listOptions) @@ -27,7 +37,7 @@ func storeAllPodLogs(t Test, namespace *corev1.Namespace) { t.T().Helper() pods, err := t.Client().Core().CoreV1().Pods(namespace.Name).List(t.Ctx(), metav1.ListOptions{}) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) for _, pod := range pods.Items { for _, container := range pod.Spec.Containers { @@ -46,15 +56,114 @@ func storeContainerLog(t Test, namespace *corev1.Namespace, podName, containerNa t.T().Logf("Error getting logs from container %s/%s/%s", namespace.Name, podName, containerName) return } - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) defer func() { - t.Expect(stream.Close()).To(gomega.Succeed()) + assert.NoError(t.T(), stream.Close()) }() bytes, err := io.ReadAll(stream) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) containerLogFileName := "pod-" + podName + "-" + containerName WriteToOutputDir(t, containerLogFileName, Log, bytes) } + +func ExecPodCmd(t Test, pod *corev1.Pod, containerName string, cmd []string) (bytes.Buffer, bytes.Buffer) { + req := t.Client().Core().CoreV1().RESTClient(). + Post(). + Resource("pods"). + Name(pod.Name). + Namespace(pod.Namespace). + SubResource("exec"). + VersionedParams(&corev1.PodExecOptions{ + Command: cmd, + Container: containerName, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: false, + }, clientgoscheme.ParameterCodec) + + t.T().Logf("Executing command: %s", cmd) + cfg := t.Client().Config() + exec, err := remotecommand.NewSPDYExecutor(&cfg, "POST", req.URL()) + assert.NoError(t.T(), err) + // Capture the output streams + var stdout, stderr bytes.Buffer + // Execute the command in the pod + err = exec.StreamWithContext(t.Ctx(), remotecommand.StreamOptions{ + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + Tty: false, + }) + t.T().Logf("Command stdout: %s", stdout.String()) + t.T().Logf("Command stderr: %s", stderr.String()) + assert.NoError(t.T(), err) + return stdout, stderr +} + +func SetupPortForward(t Test, podName, namespace string, localPort, remotePort int) (chan struct{}, error) { + cfg := t.Client().Config() + + req := t.Client().Core().CoreV1().RESTClient(). + Post(). + Resource("pods"). + Namespace(namespace). + Name(podName). + SubResource("portforward") + + transport, upgrader, err := spdy.RoundTripperFor(&cfg) + if err != nil { + return nil, err + } + + stopChan := make(chan struct{}, 1) + readyChan := make(chan struct{}, 1) + out := new(strings.Builder) + errOut := new(strings.Builder) + + // create port forward + forwarder, err := portforward.New( + spdy.NewDialer(upgrader, &http.Client{Transport: transport}, http.MethodPost, req.URL()), + []string{fmt.Sprintf("%d:%d", localPort, remotePort)}, + stopChan, + readyChan, + out, + errOut, + ) + if err != nil { + return nil, err + } + + // launch Port Forward + go func() { + defer GinkgoRecover() + err := forwarder.ForwardPorts() + assert.NoError(t.T(), err) + }() + <-readyChan // wait for port forward to finish + + return stopChan, nil +} + +func CreateCurlPod(t Test, podName, containerName, namespace string) (*corev1.Pod, error) { + // Define the podSpec spec + podSpec := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: containerName, + Image: "rancher/curl", + Command: []string{"/bin/sh", "-c", "tail -f /dev/null"}, + }, + }, + }, + } + return t.Client().Core().CoreV1().Pods(namespace).Create(t.Ctx(), podSpec, metav1.CreateOptions{}) +} diff --git a/ray-operator/test/support/defaults.go b/ray-operator/test/support/defaults.go index b5ee1ca627a..ccba58919be 100644 --- a/ray-operator/test/support/defaults.go +++ b/ray-operator/test/support/defaults.go @@ -1,6 +1,6 @@ package support const ( - RayVersion = "2.9.0" - RayImage = "rayproject/ray:2.9.0" + RayVersion = "2.40.0" + RayImage = "rayproject/ray:2.40.0" ) diff --git a/ray-operator/test/support/environment.go b/ray-operator/test/support/environment.go index 6c3764bec97..f20ef79e82c 100644 --- a/ray-operator/test/support/environment.go +++ b/ray-operator/test/support/environment.go @@ -1,7 +1,10 @@ package support import ( + "fmt" "os" + "runtime" + "strings" ) const ( @@ -20,7 +23,15 @@ func GetRayVersion() string { } func GetRayImage() string { - return lookupEnvOrDefault(KuberayTestRayImage, RayImage) + rayImage := lookupEnvOrDefault(KuberayTestRayImage, RayImage) + // detect if we are running on arm64 machine, most likely apple silicon + // the os name is not checked as it also possible that it might be linux + // also check if the image does not have the `-aarch64` suffix + if runtime.GOARCH == "arm64" && !strings.HasSuffix(rayImage, "-aarch64") { + rayImage = rayImage + "-aarch64" + fmt.Printf("Modified Ray Image to: %s for ARM chips\n", rayImage) + } + return rayImage } func lookupEnvOrDefault(key, value string) string { diff --git a/ray-operator/test/support/events.go b/ray-operator/test/support/events.go index 5bec88dc372..c317fdb41ea 100644 --- a/ray-operator/test/support/events.go +++ b/ray-operator/test/support/events.go @@ -4,7 +4,7 @@ import ( "bytes" "fmt" - "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" eventsv1 "k8s.io/api/events/v1" @@ -39,12 +39,12 @@ func storeEvents(t Test, namespace *corev1.Namespace) { t.T().Helper() events, err := t.Client().Core().EventsV1().Events(namespace.Name).List(t.Ctx(), metav1.ListOptions{}) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) - bytes, err := renderEventContent(eventKeys, mapEventsToKeys(events)) - t.Expect(err).NotTo(gomega.HaveOccurred()) + eventContent, err := renderEventContent(eventKeys, mapEventsToKeys(events)) + assert.NoError(t.T(), err) - WriteToOutputDir(t, eventLogFileName, Log, bytes) + WriteToOutputDir(t, eventLogFileName, Log, eventContent) } func mapEventsToKeys(eventList *eventsv1.EventList) []map[string]string { @@ -89,34 +89,34 @@ func renderEventContent(keys []string, dataMaps []map[string]string) ([]byte, er // Write headers for _, header := range keys { if _, err := content.WriteString(header); err != nil { - return nil, fmt.Errorf("error in writing the header: %v", err) + return nil, fmt.Errorf("error in writing the header: %w", err) } if _, err := content.WriteString(getWhitespaceStr(maxStringSizeMap[header] - len(header) + 1)); err != nil { - return nil, fmt.Errorf("error in writing headers: %v", err) + return nil, fmt.Errorf("error in writing headers: %w", err) } if _, err := content.WriteString(" | "); err != nil { - return nil, fmt.Errorf("error in writing headers : %v", err) + return nil, fmt.Errorf("error in writing headers : %w", err) } } if _, err := content.WriteString("\n"); err != nil { - return nil, fmt.Errorf("error in writing headers '|': %v", err) + return nil, fmt.Errorf("error in writing headers '|': %w", err) } // Write events for _, dataMap := range dataMaps { for _, key := range keys { if _, err := content.WriteString(dataMap[key]); err != nil { - return nil, fmt.Errorf("error in writing events: %v", err) + return nil, fmt.Errorf("error in writing events: %w", err) } if _, err := content.WriteString(getWhitespaceStr(maxStringSizeMap[key] - len(dataMap[key]) + 1)); err != nil { - return nil, fmt.Errorf("error in writing events: %v", err) + return nil, fmt.Errorf("error in writing events: %w", err) } if _, err := content.WriteString(" | "); err != nil { - return nil, fmt.Errorf("error in writing events: %v", err) + return nil, fmt.Errorf("error in writing events: %w", err) } } if _, err := content.WriteString("\n"); err != nil { - return nil, fmt.Errorf("error in writing events: %v", err) + return nil, fmt.Errorf("error in writing events: %w", err) } } return content.Bytes(), nil diff --git a/ray-operator/test/support/meta.go b/ray-operator/test/support/meta.go index 7adc7dfabe8..b95a7006dfd 100644 --- a/ray-operator/test/support/meta.go +++ b/ray-operator/test/support/meta.go @@ -6,8 +6,6 @@ type labelSelector string var _ Option[*metav1.ListOptions] = (*labelSelector)(nil) -// nolint: unused -// To be removed when the false-positivity is fixed. func (l labelSelector) applyTo(options *metav1.ListOptions) error { options.LabelSelector = string(l) return nil diff --git a/ray-operator/test/support/namespace.go b/ray-operator/test/support/namespace.go index 55c1f9b2f22..1e8741c6a78 100644 --- a/ray-operator/test/support/namespace.go +++ b/ray-operator/test/support/namespace.go @@ -1,7 +1,7 @@ package support import ( - "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -20,11 +20,11 @@ func createTestNamespace(t Test, options ...Option[*corev1.Namespace]) *corev1.N } for _, option := range options { - t.Expect(option.applyTo(namespace)).To(gomega.Succeed()) + assert.NoError(t.T(), option.applyTo(namespace)) } namespace, err := t.Client().Core().CoreV1().Namespaces().Create(t.Ctx(), namespace, metav1.CreateOptions{}) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) return namespace } @@ -35,5 +35,5 @@ func deleteTestNamespace(t Test, namespace *corev1.Namespace) { err := t.Client().Core().CoreV1().Namespaces().Delete(t.Ctx(), namespace.Name, metav1.DeleteOptions{ PropagationPolicy: &propagationPolicy, }) - t.Expect(err).NotTo(gomega.HaveOccurred()) + assert.NoError(t.T(), err) } diff --git a/ray-operator/test/support/ray.go b/ray-operator/test/support/ray.go index 8dcee663e0f..ea75ec5b306 100644 --- a/ray-operator/test/support/ray.go +++ b/ray-operator/test/support/ray.go @@ -1,23 +1,27 @@ package support import ( - "github.com/onsi/gomega" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "errors" + + "github.com/onsi/gomega/format" + "github.com/onsi/gomega/types" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/controllers/ray/common" ) -func RayJob(t Test, namespace, name string) func(g gomega.Gomega) *rayv1.RayJob { - return func(g gomega.Gomega) *rayv1.RayJob { - job, err := t.Client().Ray().RayV1().RayJobs(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) - g.Expect(err).NotTo(gomega.HaveOccurred()) - return job +func RayJob(t Test, namespace, name string) func() (*rayv1.RayJob, error) { + return func() (*rayv1.RayJob, error) { + return GetRayJob(t, namespace, name) } } -func GetRayJob(t Test, namespace, name string) *rayv1.RayJob { - t.T().Helper() - return RayJob(t, namespace, name)(t) +func GetRayJob(t Test, namespace, name string) (*rayv1.RayJob, error) { + return t.Client().Ray().RayV1().RayJobs(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) } func RayJobStatus(job *rayv1.RayJob) rayv1.JobStatus { @@ -28,35 +32,176 @@ func RayJobDeploymentStatus(job *rayv1.RayJob) rayv1.JobDeploymentStatus { return job.Status.JobDeploymentStatus } +func RayJobManagedBy(job *rayv1.RayJob) *string { + return job.Spec.ManagedBy +} + func RayJobReason(job *rayv1.RayJob) rayv1.JobFailedReason { return job.Status.Reason } -func GetRayJobId(t Test, namespace, name string) string { - t.T().Helper() - job := RayJob(t, namespace, name)(t) - return job.Status.JobId +func RayJobFailed(job *rayv1.RayJob) int32 { + if job.Status.Failed == nil { + return 0 + } + return *job.Status.Failed } -func RayCluster(t Test, namespace, name string) func(g gomega.Gomega) *rayv1.RayCluster { - return func(g gomega.Gomega) *rayv1.RayCluster { - cluster, err := t.Client().Ray().RayV1().RayClusters(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) - g.Expect(err).NotTo(gomega.HaveOccurred()) - return cluster +func RayJobSucceeded(job *rayv1.RayJob) int32 { + if job.Status.Succeeded == nil { + return 0 } + return *job.Status.Succeeded +} + +func RayJobClusterName(job *rayv1.RayJob) string { + return job.Status.RayClusterName } -func RayClusterOrError(t Test, namespace, name string) func(g gomega.Gomega) (*rayv1.RayCluster, error) { - return func(g gomega.Gomega) (*rayv1.RayCluster, error) { - return t.Client().Ray().RayV1().RayClusters(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) +func RayCluster(t Test, namespace, name string) func() (*rayv1.RayCluster, error) { + return func() (*rayv1.RayCluster, error) { + return GetRayCluster(t, namespace, name) } } -func GetRayCluster(t Test, namespace, name string) *rayv1.RayCluster { - t.T().Helper() - return RayCluster(t, namespace, name)(t) +func GetRayCluster(t Test, namespace, name string) (*rayv1.RayCluster, error) { + return t.Client().Ray().RayV1().RayClusters(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) } func RayClusterState(cluster *rayv1.RayCluster) rayv1.ClusterState { - return cluster.Status.State + return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 +} + +func StatusCondition(condType rayv1.RayClusterConditionType) func(*rayv1.RayCluster) metav1.Condition { + return func(cluster *rayv1.RayCluster) metav1.Condition { + if cluster != nil { + for _, cond := range cluster.Status.Conditions { + if cond.Type == string(condType) { + return cond + } + } + } + return metav1.Condition{} + } +} + +type ConditionMatcher struct { + expected metav1.Condition +} + +func (c *ConditionMatcher) Match(actual interface{}) (success bool, err error) { + if actual == nil { + return false, errors.New(" should be a metav1.Condition but it is nil") + } + a, ok := actual.(metav1.Condition) + if !ok { + return false, errors.New(" should be a metav1.Condition") + } + return a.Reason == c.expected.Reason && a.Status == c.expected.Status, nil +} + +func (c *ConditionMatcher) FailureMessage(actual interface{}) (message string) { + a := actual.(metav1.Condition) + return format.Message(a, "to equal", c.expected) +} + +func (c *ConditionMatcher) NegatedFailureMessage(actual interface{}) (message string) { + a := actual.(metav1.Condition) + return format.Message(a, "not to equal", c.expected) +} + +func MatchCondition(status metav1.ConditionStatus, reason string) types.GomegaMatcher { + return &ConditionMatcher{expected: metav1.Condition{Status: status, Reason: reason}} +} + +func RayClusterDesiredWorkerReplicas(cluster *rayv1.RayCluster) int32 { + return cluster.Status.DesiredWorkerReplicas +} + +func HeadPod(t Test, rayCluster *rayv1.RayCluster) func() (*corev1.Pod, error) { + return func() (*corev1.Pod, error) { + return GetHeadPod(t, rayCluster) + } +} + +func GetHeadPod(t Test, rayCluster *rayv1.RayCluster) (*corev1.Pod, error) { + pods, err := t.Client().Core().CoreV1().Pods(rayCluster.Namespace).List( + t.Ctx(), + common.RayClusterHeadPodsAssociationOptions(rayCluster).ToMetaV1ListOptions(), + ) + if err != nil { + return nil, err + } + if len(pods.Items) != 1 { + return nil, errors.New("number of head pods is not 1") + } + return &pods.Items[0], nil +} + +func WorkerPods(t Test, rayCluster *rayv1.RayCluster) func() ([]corev1.Pod, error) { + return func() ([]corev1.Pod, error) { + return GetWorkerPods(t, rayCluster) + } +} + +func GetWorkerPods(t Test, rayCluster *rayv1.RayCluster) ([]corev1.Pod, error) { + pods, err := t.Client().Core().CoreV1().Pods(rayCluster.Namespace).List( + t.Ctx(), + common.RayClusterWorkerPodsAssociationOptions(rayCluster).ToMetaV1ListOptions(), + ) + if pods == nil { + return nil, err + } + return pods.Items, err +} + +func GetAllPods(t Test, rayCluster *rayv1.RayCluster) ([]corev1.Pod, error) { + pods, err := t.Client().Core().CoreV1().Pods(rayCluster.Namespace).List( + t.Ctx(), + common.RayClusterAllPodsAssociationOptions(rayCluster).ToMetaV1ListOptions(), + ) + if pods == nil { + return nil, err + } + return pods.Items, err +} + +func GetGroupPods(t Test, rayCluster *rayv1.RayCluster, group string) []corev1.Pod { + t.T().Helper() + pods, err := t.Client().Core().CoreV1().Pods(rayCluster.Namespace).List( + t.Ctx(), + common.RayClusterGroupPodsAssociationOptions(rayCluster, group).ToMetaV1ListOptions(), + ) + assert.NoError(t.T(), err) + return pods.Items +} + +func RayClusterManagedBy(rayCluster *rayv1.RayCluster) *string { + return rayCluster.Spec.ManagedBy +} + +func GetRayService(t Test, namespace, name string) (*rayv1.RayService, error) { + return t.Client().Ray().RayV1().RayServices(namespace).Get(t.Ctx(), name, metav1.GetOptions{}) +} + +func RayService(t Test, namespace, name string) func() (*rayv1.RayService, error) { + return func() (*rayv1.RayService, error) { + return GetRayService(t, namespace, name) + } +} + +func RayServiceStatus(service *rayv1.RayService) rayv1.ServiceStatus { + return service.Status.ServiceStatus +} + +func RayServicesNumEndPoints(service *rayv1.RayService) int32 { + return service.Status.NumServeEndpoints +} + +func GetRayClusterWorkerGroupReplicaSum(cluster *rayv1.RayCluster) int32 { + var replicas int32 + for _, workerGroup := range cluster.Spec.WorkerGroupSpecs { + replicas += *workerGroup.Replicas + } + return replicas } diff --git a/ray-operator/test/support/support.go b/ray-operator/test/support/support.go index 8a657664a23..c8fc9afb928 100644 --- a/ray-operator/test/support/support.go +++ b/ray-operator/test/support/support.go @@ -7,7 +7,6 @@ import ( "github.com/onsi/gomega" "github.com/onsi/gomega/format" - k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -50,10 +49,3 @@ func init() { // Disable object truncation on test results format.MaxLength = 0 } - -func NotFound[T any](fn func(g gomega.Gomega) (T, error)) func(g gomega.Gomega) bool { - return func(g gomega.Gomega) bool { - _, err := fn(g) - return k8serrors.IsNotFound(err) - } -} diff --git a/ray-operator/test/support/test.go b/ray-operator/test/support/test.go index 59900543fd5..564e774de2f 100644 --- a/ray-operator/test/support/test.go +++ b/ray-operator/test/support/test.go @@ -1,17 +1,11 @@ package support import ( - "bufio" "context" "os" "path" "sync" "testing" - "time" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" ) @@ -22,10 +16,7 @@ type Test interface { Client() Client OutputDir() string - gomega.Gomega - NewTestNamespace(...Option[*corev1.Namespace]) *corev1.Namespace - StreamKubeRayOperatorLogs() } type Option[T any] interface { @@ -34,8 +25,6 @@ type Option[T any] interface { type errorOption[T any] func(to T) error -// nolint: unused -// To be removed when the false-positivity is fixed. func (o errorOption[T]) applyTo(to T) error { return o(to) } @@ -52,16 +41,14 @@ func With(t *testing.T) Test { } return &T{ - WithT: gomega.NewWithT(t), - t: t, - ctx: ctx, + t: t, + ctx: ctx, } } type T struct { - *gomega.WithT t *testing.T - // nolint: containedctx + //nolint:containedctx //nolint:nolintlint // TODO: The reason for this lint is unknown ctx context.Context client Client outputDir string @@ -126,36 +113,3 @@ func (t *T) NewTestNamespace(options ...Option[*corev1.Namespace]) *corev1.Names }) return namespace } - -func (t *T) StreamKubeRayOperatorLogs() { - ctx, cancel := context.WithCancel(context.Background()) - t.T().Cleanup(cancel) - // By using `.Pods("")`, we list kuberay-operators from all namespaces - // because they may not always be installed in the "ray-system" namespace. - pods, err := t.Client().Core().CoreV1().Pods("").List(ctx, metav1.ListOptions{ - LabelSelector: "app.kubernetes.io/component=kuberay-operator", - }) - t.Expect(err).ShouldNot(gomega.HaveOccurred()) - t.Expect(pods.Items).ShouldNot(gomega.BeEmpty()) - now := metav1.NewTime(time.Now()) - for _, pod := range pods.Items { - go func(pod corev1.Pod, ts *metav1.Time) { - req := t.Client().Core().CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ - Follow: true, - SinceTime: ts, - }) - stream, err := req.Stream(ctx) - if err != nil { - t.T().Logf("Fail to tail logs from the pod %s/%s", pod.Namespace, pod.Name) - return - } - t.T().Logf("Start tailing logs from the pod %s/%s", pod.Namespace, pod.Name) - defer stream.Close() - scanner := bufio.NewScanner(stream) - for scanner.Scan() { - t.T().Log(scanner.Text()) - } - t.T().Logf("Stop tailing logs from the pod %s/%s: %v", pod.Namespace, pod.Name, scanner.Err()) - }(pod, &now) - } -} diff --git a/ray-operator/test/support/utils.go b/ray-operator/test/support/utils.go index ac39238d610..b24a02ff298 100644 --- a/ray-operator/test/support/utils.go +++ b/ray-operator/test/support/utils.go @@ -5,7 +5,7 @@ import ( "os" "path" - "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" ) func Ptr[T any](v T) *T { @@ -20,6 +20,6 @@ const ( func WriteToOutputDir(t Test, fileName string, fileType OutputType, data []byte) { t.T().Helper() - t.Expect(os.WriteFile(path.Join(t.OutputDir(), fileName+"."+string(fileType)), data, fs.ModePerm)). - To(gomega.Succeed()) + err := os.WriteFile(path.Join(t.OutputDir(), fileName+"."+string(fileType)), data, fs.ModePerm) + assert.NoError(t.T(), err) } diff --git a/ray-operator/test/support/yaml.go b/ray-operator/test/support/yaml.go new file mode 100644 index 00000000000..289b96bc507 --- /dev/null +++ b/ray-operator/test/support/yaml.go @@ -0,0 +1,73 @@ +package support + +import ( + "os" + "os/exec" + + "github.com/stretchr/testify/require" + + "k8s.io/apimachinery/pkg/runtime" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + rayscheme "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme" +) + +func deserializeYAML(filename string, into runtime.Object) error { + yamlFileContent, err := os.ReadFile(filename) + if err != nil { + return err + } + decoder := rayscheme.Codecs.UniversalDecoder() + if _, _, err = decoder.Decode(yamlFileContent, nil, into); err != nil { + return err + } + return nil +} + +func DeserializeRayClusterYAML(t Test, filename string) *rayv1.RayCluster { + t.T().Helper() + rayCluster := &rayv1.RayCluster{} + err := deserializeYAML(filename, rayCluster) + require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename) + return rayCluster +} + +func DeserializeRayJobYAML(t Test, filename string) *rayv1.RayJob { + t.T().Helper() + rayJob := &rayv1.RayJob{} + err := deserializeYAML(filename, rayJob) + require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename) + return rayJob +} + +func DeserializeRayServiceYAML(t Test, filename string) *rayv1.RayService { + t.T().Helper() + rayService := &rayv1.RayService{} + err := deserializeYAML(filename, rayService) + require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename) + return rayService +} + +func KubectlApplyYAML(t Test, filename string, namespace string) { + t.T().Helper() + kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "apply", "-f", filename, "-n", namespace) + err := kubectlCmd.Run() + require.NoError(t.T(), err, "Failed to apply %s to namespace %s", filename, namespace) + t.T().Logf("Successfully applied %s to namespace %s", filename, namespace) +} + +func KubectlApplyQuota(t Test, namespace, quota string) { + t.T().Helper() + kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "create", "quota", namespace, "-n", namespace, quota) + err := kubectlCmd.Run() + require.NoError(t.T(), err, "Failed to apply quota %s in %s", quota, namespace) + t.T().Logf("Successfully applied quota %s in %s", quota, namespace) +} + +func KubectlDeleteAllPods(t Test, namespace string) { + t.T().Helper() + kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "delete", "--all", "pods", "-n", namespace) + err := kubectlCmd.Run() + require.NoError(t.T(), err, "Failed to delete pods in %s", namespace) + t.T().Logf("Successfully delete pods in %s", namespace) +} diff --git a/ray-operator/test/utils/string_conversion_test.go b/ray-operator/test/utils/string_conversion_test.go new file mode 100644 index 00000000000..1022a268910 --- /dev/null +++ b/ray-operator/test/utils/string_conversion_test.go @@ -0,0 +1,26 @@ +package strconv + +import ( + "testing" + "unsafe" + + "github.com/onsi/gomega" + + utils "github.com/ray-project/kuberay/ray-operator/pkg/utils" +) + +func TestStringConversion(t *testing.T) { + g := gomega.NewWithT(t) + + str := "hello world" + + // Test string to byte array conversion. + arr := utils.ConvertStringToByteSlice(str) + g.Expect(arr).Should(gomega.Equal([]byte(str))) + g.Expect(&arr[0]).Should(gomega.Equal(unsafe.StringData(str))) + + // Test byte array to string conversion. + convStr := utils.ConvertByteSliceToString(arr) + g.Expect(str).Should(gomega.Equal(convStr)) + g.Expect(unsafe.StringData(convStr)).Should(gomega.Equal(&arr[0])) +} diff --git a/scripts/changelog-generator.py b/scripts/changelog-generator.py index 58bec2bd545..1befc92c648 100644 --- a/scripts/changelog-generator.py +++ b/scripts/changelog-generator.py @@ -36,4 +36,3 @@ def generate(self, pr_id): for pr_match in re.finditer(r"#(\d+)", payload): pr_id = int(pr_match.group(1)) print("* {}".format(g.generate(pr_id))) - diff --git a/scripts/openapi2jsonschema.py b/scripts/openapi2jsonschema.py new file mode 100755 index 00000000000..845b297c64f --- /dev/null +++ b/scripts/openapi2jsonschema.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 + +# This script is directly derived from: +# Source: https://github.com/yannh/kubeconform/blob/master/scripts/openapi2jsonschema.py + +# Derived from https://github.com/instrumenta/openapi2jsonschema +import yaml +import json +import sys +import os +import urllib.request +if 'DISABLE_SSL_CERT_VALIDATION' in os.environ: + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + +def test_additional_properties(): + for test in iter([{ + "input": {"something": {"properties": {}}}, + "expect": {'something': {'properties': {}, "additionalProperties": False}} + },{ + "input": {"something": {"somethingelse": {}}}, + "expect": {'something': {'somethingelse': {}}} + }]): + assert additional_properties(test["input"]) == test["expect"] + +def additional_properties(data, skip=False): + "This recreates the behaviour of kubectl at https://github.com/kubernetes/kubernetes/blob/225b9119d6a8f03fcbe3cc3d590c261965d928d0/pkg/kubectl/validation/schema.go#L312" + if isinstance(data, dict): + if "properties" in data and not skip: + if "additionalProperties" not in data: + data["additionalProperties"] = False + for _, v in data.items(): + additional_properties(v) + return data + +def test_replace_int_or_string(): + for test in iter([{ + "input": {"something": {"format": "int-or-string"}}, + "expect": {'something': {'oneOf': [{'type': 'string'}, {'type': 'integer'}]}} + },{ + "input": {"something": {"format": "string"}}, + "expect": {"something": {"format": "string"}}, + }]): + assert replace_int_or_string(test["input"]) == test["expect"] + +def replace_int_or_string(data): + new = {} + try: + for k, v in iter(data.items()): + new_v = v + if isinstance(v, dict): + if "format" in v and v["format"] == "int-or-string": + new_v = {"oneOf": [{"type": "string"}, {"type": "integer"}]} + else: + new_v = replace_int_or_string(v) + elif isinstance(v, list): + new_v = list() + for x in v: + new_v.append(replace_int_or_string(x)) + else: + new_v = v + new[k] = new_v + return new + except AttributeError: + return data + +def allow_null_optional_fields(data, parent=None, grand_parent=None, key=None): + new = {} + try: + for k, v in iter(data.items()): + new_v = v + if isinstance(v, dict): + new_v = allow_null_optional_fields(v, data, parent, k) + elif isinstance(v, list): + new_v = list() + for x in v: + new_v.append(allow_null_optional_fields(x, v, parent, k)) + elif isinstance(v, str): + is_non_null_type = k == "type" and v != "null" + has_required_fields = grand_parent and "required" in grand_parent + if is_non_null_type and not has_required_fields: + new_v = [v, "null"] + new[k] = new_v + return new + except AttributeError: + return data + + +def append_no_duplicates(obj, key, value): + """ + Given a dictionary, lookup the given key, if it doesn't exist create a new array. + Then check if the given value already exists in the array, if it doesn't add it. + """ + if key not in obj: + obj[key] = [] + if value not in obj[key]: + obj[key].append(value) + + +def write_schema_file(schema, filename): + schemaJSON = "" + + schema = additional_properties(schema, skip=not os.getenv("DENY_ROOT_ADDITIONAL_PROPERTIES")) + schema = replace_int_or_string(schema) + schemaJSON = json.dumps(schema, indent=2) + + # Dealing with user input here.. + filename = os.path.basename(filename) + f = open(filename, "w") + print(schemaJSON, file=f) + f.close() + print("JSON schema written to {filename}".format(filename=filename)) + + +def construct_value(load, node): + # Handle nodes that start with '=' + # See https://github.com/yaml/pyyaml/issues/89 + if not isinstance(node, yaml.ScalarNode): + raise yaml.constructor.ConstructorError( + "while constructing a value", + node.start_mark, + "expected a scalar, but found %s" % node.id, node.start_mark + ) + yield str(node.value) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print('Missing FILE parameter.\nUsage: %s [FILE]' % sys.argv[0]) + exit(1) + + for crdFile in sys.argv[1:]: + if crdFile.startswith("http"): + f = urllib.request.urlopen(crdFile) + else: + f = open(crdFile) + with f: + defs = [] + yaml.SafeLoader.add_constructor(u'tag:yaml.org,2002:value', construct_value) + for y in yaml.load_all(f, Loader=yaml.SafeLoader): + if y is None: + continue + if "items" in y: + defs.extend(y["items"]) + if "kind" not in y: + continue + if y["kind"] != "CustomResourceDefinition": + continue + else: + defs.append(y) + + for y in defs: + filename_format = os.getenv("FILENAME_FORMAT", "{kind}_{version}") + filename = "" + if "spec" in y and "versions" in y["spec"] and y["spec"]["versions"]: + for version in y["spec"]["versions"]: + if "schema" in version and "openAPIV3Schema" in version["schema"]: + filename = filename_format.format( + kind=y["spec"]["names"]["kind"], + group=y["spec"]["group"].split(".")[0], + fullgroup=y["spec"]["group"], + version=version["name"], + ).lower() + ".json" + + schema = version["schema"]["openAPIV3Schema"] + write_schema_file(schema, filename) + elif "validation" in y["spec"] and "openAPIV3Schema" in y["spec"]["validation"]: + filename = filename_format.format( + kind=y["spec"]["names"]["kind"], + group=y["spec"]["group"].split(".")[0], + fullgroup=y["spec"]["group"], + version=version["name"], + ).lower() + ".json" + + schema = y["spec"]["validation"]["openAPIV3Schema"] + write_schema_file(schema, filename) + elif "spec" in y and "validation" in y["spec"] and "openAPIV3Schema" in y["spec"]["validation"]: + filename = filename_format.format( + kind=y["spec"]["names"]["kind"], + group=y["spec"]["group"].split(".")[0], + fullgroup=y["spec"]["group"], + version=y["spec"]["version"], + ).lower() + ".json" + + schema = y["spec"]["validation"]["openAPIV3Schema"] + write_schema_file(schema, filename) + + exit(0) diff --git a/scripts/rbac-check.py b/scripts/rbac-check.py index 1b876ddee50..f9d33df536c 100644 --- a/scripts/rbac-check.py +++ b/scripts/rbac-check.py @@ -1,6 +1,6 @@ import os import sys -from yaml import load, CLoader as Loader +from yaml import load, CSafeLoader from deepdiff import DeepDiff def compare_two_yaml(yaml1, yaml2): @@ -19,12 +19,12 @@ def compare_two_yaml(yaml1, yaml2): diff_files = [] for f in files: - yaml1 = load(open(helm_rbac_dir + f, 'r'), Loader=Loader) - yaml2 = load(open(kustomize_rbac_dir + f, 'r'), Loader=Loader) + yaml1 = load(open(helm_rbac_dir + f, 'r'), Loader=CSafeLoader) + yaml2 = load(open(kustomize_rbac_dir + f, 'r'), Loader=CSafeLoader) if not compare_two_yaml(yaml1, yaml2): diff_files.append(f) if diff_files: - sys.exit(f"{diff_files} are out of synchronization! RBAC YAML files in" + - "\'helm-chart/kuberay-operator/templates\' and \'ray-operator/config/rbac\'" + + sys.exit(f"{diff_files} are out of synchronization! RBAC YAML files in" + + "\'helm-chart/kuberay-operator/templates\' and \'ray-operator/config/rbac\'" + "should be synchronized manually. See DEVELOPMENT.md for more details.") diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 391fe3b892c..97318c1d7cd 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -2,4 +2,4 @@ PyYAML==6.0 deepdiff==5.8.1 PyGithub==1.57 pytest==7.0.1 -jsonpatch==1.32 \ No newline at end of file +jsonpatch==1.32 diff --git a/scripts/validate-helm.sh b/scripts/validate-helm.sh new file mode 100644 index 00000000000..7fd1b1410f8 --- /dev/null +++ b/scripts/validate-helm.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -euo pipefail +export KUBERAY_HOME=$(git rev-parse --show-toplevel) +SCRIPT_PATH="${KUBERAY_HOME}/scripts/openapi2jsonschema.py" +RAYCLUSTER_CRD_PATH="$KUBERAY_HOME/ray-operator/config/crd/bases/ray.io_rayclusters.yaml" +tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' EXIT + +# Convert CRD YAML to JSON Schema +pushd "${tmp}" > /dev/null +"$SCRIPT_PATH" "$RAYCLUSTER_CRD_PATH" +popd > /dev/null +RAYCLUSTER_CRD_SCHEMA="${tmp}/raycluster_v1.json" + +# Validate Helm charts with kubeconform +echo "Validating Helm Charts with kubeconform..." +helm template "$KUBERAY_HOME/helm-chart/kuberay-apiserver" | kubeconform --summary -schema-location default +helm template "$KUBERAY_HOME/helm-chart/kuberay-operator" | kubeconform --summary -schema-location default +helm template "$KUBERAY_HOME/helm-chart/ray-cluster" | kubeconform --summary -schema-location default -schema-location "$RAYCLUSTER_CRD_SCHEMA" diff --git a/tests/config/ray-cluster.ray-ft.yaml.template b/tests/config/ray-cluster.ray-ft.yaml.template index a6e42de6a01..87019f72f54 100644 --- a/tests/config/ray-cluster.ray-ft.yaml.template +++ b/tests/config/ray-cluster.ray-ft.yaml.template @@ -65,18 +65,16 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - labels: - controller-tools.k8s.io: "1.0" - annotations: - ray.io/ft-enabled: "true" # enable Ray GCS FT name: raycluster-external-redis spec: rayVersion: '$ray_version' + gcsFaultToleranceOptions: + redisAddress: "redis:6379" + redisPassword: + value: "5241590000000000" headGroupSpec: rayStartParams: - dashboard-host: "0.0.0.0" num-cpus: "0" - redis-password: "5241590000000000" #pod template template: spec: @@ -84,9 +82,6 @@ spec: - name: ray-head image: $ray_image env: - # RAY_REDIS_ADDRESS can force ray to use external redis - - name: RAY_REDIS_ADDRESS - value: redis:6379 - name: RAY_gcs_rpc_server_reconnect_timeout_s value: "20" ports: @@ -150,7 +145,7 @@ data: # 0: Wait until only 1 head Pod is alive print("Wait until only 1 head Pod is alive", flush=True) - for i in range(90): + for i in range(90): nodes = list_nodes() num_alive_heads = 0 for node in nodes: diff --git a/tests/framework/config/requirements.txt b/tests/framework/config/requirements.txt index 56825b0fb62..50724ab0a3f 100644 --- a/tests/framework/config/requirements.txt +++ b/tests/framework/config/requirements.txt @@ -2,4 +2,4 @@ docker GitPython kubernetes jsonpatch -pytest \ No newline at end of file +pytest diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 2e9cb53c114..61c375203d0 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -1,9 +1,9 @@ """Configuration test framework for KubeRay""" import json -import jsonpatch -from typing import Dict, List, Optional -import unittest import time +import unittest +from typing import Dict, List, Optional +import jsonpatch from framework.utils import ( create_custom_object, @@ -306,17 +306,20 @@ def assert_rule(self, custom_resource, cr_namespace): name=custom_resource["metadata"]["name"], namespace=cr_namespace, path=query.get("path").rstrip("/"), - json=json.dumps(query["json_args"]), + json=json.dumps(query["json_args"]) ) + if self.start_in_background: - shell_subprocess_run(f"{cmd} &", hide_output=True) + shell_subprocess_run(f"{cmd} &", hide_output=False) else: output = shell_subprocess_check_output(cmd) + logger.info("curl output: %s", output.decode('utf-8')) if hasattr(query.get("expected_output"), "__iter__"): assert output.decode('utf-8') in query["expected_output"] else: assert output.decode('utf-8') == query["expected_output"] + time.sleep(1) class AutoscaleRule(Rule): def __init__( diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 1eefefc29d9..2196867dfde 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -78,7 +78,7 @@ def upload_image(): @abstractmethod def check_cluster_exist(self) -> bool: pass - + @classmethod def instance(cls): if cls.EXTERNAL_CLUSTER in os.environ: @@ -92,7 +92,7 @@ class ExternalClusterManager(ClusterManager): def __init__(self) -> None: self.k8s_client_dict = {} self.cleanup_timeout = 120 - + def cleanup(self, namespace = "default") -> None: if self.CLUSTER_CLEANUP_SCRIPT in os.environ: cleanup_script = os.environ[self.CLUSTER_CLEANUP_SCRIPT] @@ -121,7 +121,7 @@ def cleanup(self, namespace = "default") -> None: break time.sleep(1) - + for _, k8s_client in self.k8s_client_dict.items(): k8s_client.api_client.rest_client.pool_manager.clear() k8s_client.api_client.close() @@ -148,7 +148,7 @@ def check_cluster_exist(self) -> bool: ) == 0 ) - + def __delete_all_crs(self, group, version, namespace, plural): custom_objects_api = self.k8s_client_dict[CONST.K8S_CR_CLIENT_KEY] try: @@ -204,7 +204,7 @@ def check_cluster_exist(self) -> bool: ) == 0 ) - + def _adjust_kubeconfig_server_address(self) -> None: """Modify the server address in kubeconfig to https://docker:6443""" if os.getenv(CONST.BUILDKITE_ENV, default="") == "true": @@ -341,7 +341,7 @@ def prepare_operator(self): return_code = shell_subprocess_run(self.installation_script) if return_code != 0: raise Exception("Operator installation failed with exit code " + str(return_code)) - + def shell_subprocess_run(command, check=True, hide_output=False) -> int: """Command will be executed through the shell. @@ -420,7 +420,7 @@ def delete_all_cr(crd_name, namespace, check=True): def start_curl_pod(name: str, namespace: str, timeout_s: int = -1): shell_subprocess_run( - f"kubectl run {name} --image=radial/busyboxplus:curl -n {namespace} " + f"kubectl run {name} --image=rancher/curl -n {namespace} " '--command -- /bin/sh -c "while true; do sleep 10;done"' ) diff --git a/tests/test_sample_raycluster_yamls.py b/tests/test_sample_raycluster_yamls.py deleted file mode 100644 index a45bb3a74fe..00000000000 --- a/tests/test_sample_raycluster_yamls.py +++ /dev/null @@ -1,89 +0,0 @@ -''' Test sample RayCluster YAML files to catch invalid and outdated ones. ''' -import logging -import unittest -import os -import git -import yaml -import argparse - -from framework.prototype import ( - RuleSet, - GeneralTestCase, - RayClusterAddCREvent, - HeadPodNameRule, - EasyJobRule, - HeadSvcRule, -) - -from framework.utils import ( - CONST -) - -logger = logging.getLogger(__name__) - -def parse_args(): - parser = argparse.ArgumentParser(description='Run tests for specified YAML files.') - parser.add_argument('--yaml-files', nargs='*', help='Use the filename under path `ray-operator/config/samples` to specify which YAML files should be tested.') - return parser.parse_args() - -if __name__ == '__main__': - NAMESPACE = 'default' - SAMPLE_PATH = CONST.REPO_ROOT.joinpath("ray-operator/config/samples/") - - sample_yaml_files = [] - - # Paths of untracked files, specified as strings, relative to KubeRay - # git root directory. - untracked_files = set( - git.Repo(CONST.REPO_ROOT).untracked_files - ) - - args = parse_args() - - for file in os.scandir(SAMPLE_PATH): - if not file.is_file(): - continue - # For local development, skip untracked files. - if os.path.relpath(file.path, CONST.REPO_ROOT) in untracked_files: - continue - # Skip files that don't match the specified YAML files - if args.yaml_files and file.name not in args.yaml_files: - continue - - with open(file, encoding="utf-8") as cr_yaml: - for k8s_object in yaml.safe_load_all(cr_yaml): - if k8s_object['kind'] == 'RayCluster': - sample_yaml_files.append( - {'path': file.path, 'name': file.name, 'cr': k8s_object} - ) - break - - skip_tests = { - 'ray-cluster.complete.large.yaml': 'Skip this test because it requires a lot of resources.', - 'ray-cluster.autoscaler.large.yaml': - 'Skip this test because it requires a lot of resources.', - 'ray-cluster.tpu-v4-singlehost.yaml': 'Skip this test because it requires TPU resources.', - 'ray-cluster.tpu-v4-multihost.yaml' : 'Skip this test because it requires TPU resources', - 'ray-cluster.gke-bucket.yaml': 'Skip this test because it requires GKE and k8s service accounts.', - 'ray-service.high-availability-locust.yaml': 'Skip this test because the RayCluster here is only used for testing RayService.', - } - - rs = RuleSet([HeadPodNameRule(), EasyJobRule(), HeadSvcRule()]) - - # Build a test plan - logger.info("Build a test plan ...") - test_cases = unittest.TestSuite() - for index, new_cr in enumerate(sample_yaml_files): - if new_cr['name'] in skip_tests: - logger.info('[SKIP TEST %d] %s: %s', index, new_cr['name'], skip_tests[new_cr['name']]) - continue - logger.info('[TEST %d]: %s', index, new_cr['name']) - addEvent = RayClusterAddCREvent(new_cr['cr'], [rs], 90, NAMESPACE, new_cr['path']) - test_cases.addTest(GeneralTestCase('runtest', addEvent)) - - # Execute all tests - runner = unittest.TextTestRunner() - test_result = runner.run(test_cases) - - # Without this line, the exit code will always be 0. - assert test_result.wasSuccessful() diff --git a/tests/test_sample_rayjob_yamls.py b/tests/test_sample_rayjob_yamls.py deleted file mode 100644 index 4b52a7c36fb..00000000000 --- a/tests/test_sample_rayjob_yamls.py +++ /dev/null @@ -1,54 +0,0 @@ -''' Test sample RayJob YAML files to catch invalid and outdated ones. ''' -import unittest -import os -import logging -import yaml - -from framework.prototype import ( - RuleSet, - GeneralTestCase, - RayJobAddCREvent, - EasyJobRule, - ShutdownJobRule, -) - -from framework.utils import ( - CONST -) - -logger = logging.getLogger(__name__) - -if __name__ == '__main__': - NAMESPACE = 'default' - SAMPLE_PATH = CONST.REPO_ROOT.joinpath("ray-operator/config/samples/") - YAMLs = ['ray-job.sample.yaml', 'ray-job.shutdown.yaml', 'ray-job.custom-head-svc.yaml', 'ray-job.resources.yaml'] - - sample_yaml_files = [] - for filename in YAMLs: - filepath = SAMPLE_PATH.joinpath(filename) - with open(filepath, encoding="utf-8") as cr_yaml: - for k8s_object in yaml.safe_load_all(cr_yaml): - if k8s_object['kind'] == 'RayJob': - sample_yaml_files.append( - {'path': filepath, 'name': filename, 'cr': k8s_object} - ) - break - # NOTE: The Ray Job "SUCCEEDED" status is checked in the `RayJobAddCREvent` itself. - # (The event is not considered "converged" until the job has succeeded.) The EasyJobRule - # is only used to additionally check that the Ray Cluster remains alive and functional. - rs = RuleSet([EasyJobRule(), ShutdownJobRule()]) - - # Build a test plan - logger.info("Building a test plan ...") - test_cases = unittest.TestSuite() - for index, new_cr in enumerate(sample_yaml_files): - logger.info('[TEST %d]: %s', index, new_cr['name']) - addEvent = RayJobAddCREvent(new_cr['cr'], [rs], 300, NAMESPACE, new_cr['path']) - test_cases.addTest(GeneralTestCase('runtest', addEvent)) - - # Execute all testsCRs - runner = unittest.TextTestRunner() - test_result = runner.run(test_cases) - - # Without this line, the exit code will always be 0. - assert test_result.wasSuccessful() diff --git a/tests/test_sample_rayservice_yamls.py b/tests/test_sample_rayservice_yamls.py deleted file mode 100644 index e99f4711b7c..00000000000 --- a/tests/test_sample_rayservice_yamls.py +++ /dev/null @@ -1,355 +0,0 @@ -''' Test sample RayService YAML files to catch invalid and outdated ones. ''' -from copy import deepcopy -from kubernetes import client -import logging -import pytest -import sys -from tempfile import NamedTemporaryFile -import time -from typing import Any, Dict, List, Optional -import yaml - -from framework.prototype import ( - RuleSet, - CREvent, - EasyJobRule, - CurlServiceRule, - AutoscaleRule, - get_expected_head_pods, - get_expected_worker_pods, - show_cluster_info, - check_pod_running, -) - -from framework.utils import ( - get_custom_object, - start_curl_pod, - logger, - shell_subprocess_run, - CONST, - K8S_CLUSTER_MANAGER, - OperatorManager -) - -logger = logging.getLogger(__name__) - -NAMESPACE = 'default' - -class RayServiceAddCREvent(CREvent): - """CREvent for RayService addition""" - - def exec(self): - shell_subprocess_run(f"kubectl apply -n {self.namespace} -f {self.filepath}") - - def wait(self): - """Wait for RayService to converge - - Wait until: - (1) serviceStatus is "Running": This means serve applications in RayCluster are ready to serve incoming traffic. - (2) numServeEndpoints > 0: This means the k8s serve service is ready to redirect traffic to the RayCluster. - """ - - logger.info("Waiting for pods in ray service to be running...") - start_time = time.time() - - while time.time() - start_time < self.timeout: - rayservice = get_custom_object(CONST.RAY_SERVICE_CRD, self.namespace, - self.custom_resource_object["metadata"]["name"]) - status = rayservice.get("status", {}) - if status.get("serviceStatus") == "Running" and status.get("numServeEndpoints", 0) > 0: - logger.info("--- RayServiceAddCREvent %s seconds ---", time.time() - start_time) - return - time.sleep(1) - - logger.info( - f"RayServiceAddCREvent wait() failed to converge in {self.timeout}s." - f"expected serviceStatus: Running, got {status.get('serviceStatus')}" - f"expected numServeEndpoints > 0, got {status.get('numServeEndpoints')}" - ) - show_cluster_info(self.namespace) - raise TimeoutError(f"RayServiceAddCREvent didn't finish in {self.timeout}s") - -class RayServiceUpdateCREvent(CREvent): - """CREvent for RayService update""" - - def __init__( - self, - custom_resource_object, - rulesets: List[RuleSet] = [], - timeout: int = 180, - namespace: str = "default", - filepath: Optional[str] = None, - switch_cluster: bool = False, - query_while_updating: Optional[Dict[str, str]] = None, - ): - super().__init__(custom_resource_object, rulesets, timeout, namespace, filepath) - self.name = self.custom_resource_object["metadata"]["name"] - self.query_rule = None - self.switch_cluster = switch_cluster - if query_while_updating: - self.query_rule = CurlServiceRule(queries=query_while_updating) - - def get_active_ray_cluster_name(self): - rayservice = get_custom_object(CONST.RAY_SERVICE_CRD, self.namespace, self.name) - return rayservice["status"]["activeServiceStatus"]["rayClusterName"] - - def exec(self): - """Update a CR by a `kubectl apply` command.""" - - self.old_cluster_name = self.get_active_ray_cluster_name() - self.start = time.time() - shell_subprocess_run(f"kubectl apply -n {self.namespace} -f {self.filepath}") - - def wait_for_service_status(self, service_status: str): - """Helper function to check for service status.""" - - while time.time() - self.start < self.timeout: - rayservice = get_custom_object(CONST.RAY_SERVICE_CRD, self.namespace, self.name) - status = rayservice.get("status", {}) - if status.get("serviceStatus") == service_status and status.get("numServeEndpoints", 0) > 0: - return - if self.query_rule: - self.query_rule.assert_rule(self.custom_resource_object, self.namespace) - time.sleep(0.1) - else: - raise TimeoutError( - f"RayServiceUpdateCREvent wait() failed to converge in {self.timeout}s." - f"expected serviceStatus: {service_status}, got {status.get('serviceStatus')}" - f"expected numServeEndpoints > 0, got {status.get('numServeEndpoints')}" - ) - - def wait(self): - """Wait for deployment to transition -> WaitForServeDeploymentReady -> Running""" - - self.wait_for_service_status("WaitForServeDeploymentReady") - logger.info("Ray service transitioned to status WaitForServeDeploymentReady.") - self.wait_for_service_status("Running") - logger.info("Ray service transitioned to status Running.") - - if self.switch_cluster: - new_cluster_name = self.get_active_ray_cluster_name() - assert new_cluster_name != self.old_cluster_name - - # The old RayCluster will continue to exist for a while to allow the k8s service - # enough time to fully redirect traffic to the new RayCluster. During this period, - # queries might still be processed by either the old or the new RayCluster. - custom_api = K8S_CLUSTER_MANAGER.k8s_client_dict[CONST.K8S_CR_CLIENT_KEY] - while time.time() - self.start < self.timeout: - rayclusters = custom_api.list_namespaced_custom_object( - group = 'ray.io', version = 'v1', namespace = self.namespace, - plural = 'rayclusters') - if len(rayclusters["items"]) == 1 and rayclusters["items"][0]["metadata"]["name"] == new_cluster_name: - logger.info(f'Ray service has fully moved to cluster "{new_cluster_name}"') - return - self.query_rule.assert_rule(self.custom_resource_object, self.namespace) - - - -class RayServiceDeleteCREvent(CREvent): - """CREvent for RayService deletion""" - def exec(self): - """Delete a CR by a `kubectl delete` command.""" - shell_subprocess_run(f"kubectl delete -n {self.namespace} -f {self.filepath}") - - def wait(self): - """Wait for pods to be deleted""" - custom_api = K8S_CLUSTER_MANAGER.k8s_client_dict[CONST.K8S_CR_CLIENT_KEY] - start_time = time.time() - while time.time() - start_time < self.timeout: - rayservices = custom_api.list_namespaced_custom_object( - group = 'ray.io', version = 'v1', namespace = self.namespace, - plural = 'rayservices') - rayclusters = custom_api.list_namespaced_custom_object( - group = 'ray.io', version = 'v1', namespace = self.namespace, - plural = 'rayclusters') - - if (len(rayservices["items"]) == 0 and len(rayclusters["items"]) == 0): - logger.info("--- Cleanup RayService %s seconds ---", time.time() - start_time) - return - time.sleep(1) - - logger.info(f"RayServiceDeleteCREvent failed to converge in {self.timeout}s.") - show_cluster_info(self.namespace) - raise TimeoutError(f"RayServiceDeleteCREvent didn't finish in {self.timeout}s.") - - -class TestRayService: - sample_path = CONST.REPO_ROOT.joinpath("ray-operator/config/samples/").joinpath('ray-service.sample.yaml') - - @pytest.fixture - def set_up_cluster(self): - with open(self.sample_path, encoding="utf-8") as cr_yaml: - self.cr = yaml.safe_load(cr_yaml) - - self.default_queries = [ - {"path": "/fruit", "json_args": ["MANGO", 2], "expected_output": "6"}, - {"path": "/calc", "json_args": ["MUL", 3], "expected_output": "15 pizzas please!"}, - ] - - K8S_CLUSTER_MANAGER.cleanup() - K8S_CLUSTER_MANAGER.initialize_cluster() - operator_manager = OperatorManager.instance() - operator_manager.prepare_operator() - start_curl_pod("curl", "default") - - yield - - K8S_CLUSTER_MANAGER.cleanup() - - def test_deploy_applications(self, set_up_cluster): - rs = RuleSet([EasyJobRule(), CurlServiceRule(queries=self.default_queries)]) - cr_events: List[CREvent] = [ - RayServiceAddCREvent(self.cr, [rs], 90, NAMESPACE, self.sample_path), - RayServiceDeleteCREvent(self.cr, [], 90, NAMESPACE, self.sample_path) - ] - - for cr_event in cr_events: - cr_event.trigger() - - def test_in_place_update(self, set_up_cluster): - # Modify the MangoStand price and Multiplier factor - updated_cr = deepcopy(self.cr) - config = yaml.safe_load(self.cr["spec"]["serveConfigV2"]) - config["applications"][0]["deployments"][0]["user_config"]["price"] = 4 - config["applications"][1]["deployments"][1]["user_config"]["factor"] = 3 - updated_cr["spec"]["serveConfigV2"] = yaml.safe_dump(config) - - updated_queries = [ - {"path": "/fruit", "json_args": ["MANGO", 2], "expected_output": "8"}, - {"path": "/calc", "json_args": ["MUL", 3], "expected_output": "9 pizzas please!"}, - ] - - with NamedTemporaryFile(mode="w+", suffix=".yaml") as yaml_copy: - logger.info(f"Writing modified RayService yaml to {yaml_copy.name}.") - yaml_copy.writelines(yaml.safe_dump(updated_cr)) - yaml_copy.flush() - - cr_events: List[CREvent] = [ - RayServiceAddCREvent( - custom_resource_object=self.cr, - rulesets=[RuleSet([EasyJobRule(), CurlServiceRule(queries=self.default_queries)])], - timeout=90, - namespace=NAMESPACE, - filepath=self.sample_path - ), - RayServiceUpdateCREvent( - custom_resource_object=self.cr, - rulesets=[RuleSet([EasyJobRule(), CurlServiceRule(queries=updated_queries)])], - timeout=90, - namespace=NAMESPACE, - filepath=yaml_copy.name - ), - RayServiceDeleteCREvent(self.cr, [], 90, NAMESPACE, self.sample_path), - ] - - for cr_event in cr_events: - cr_event.trigger() - - def test_zero_downtime_rollout(self, set_up_cluster): - # Modify the cluster spec to trigger a rollout - updated_cr = deepcopy(self.cr) - - config = yaml.safe_load(self.cr["spec"]["serveConfigV2"]) - config["applications"][0]["deployments"][0]["user_config"]["price"] = 4 - config["applications"][1]["deployments"][1]["user_config"]["factor"] = 3 - updated_cr["spec"]["serveConfigV2"] = yaml.safe_dump(config) - - env = [{"name": "SAMPLE_ENV_VAR", "value": "SAMPLE_VALUE"}] - updated_cr["spec"]["rayClusterConfig"]["headGroupSpec"]["template"]["spec"]["containers"][0]["env"] = env - - updated_queries = [ - {"path": "/fruit", "json_args": ["MANGO", 2], "expected_output": "8"}, - {"path": "/calc", "json_args": ["MUL", 3], "expected_output": "9 pizzas please!"}, - ] - allowed_queries_during_update = deepcopy(self.default_queries) - allowed_queries_during_update[0]["expected_output"] = {"6", "8"} - allowed_queries_during_update[1]["expected_output"] = {"15 pizzas please!", "9 pizzas please!"} - - with NamedTemporaryFile(mode="w+", suffix=".yaml") as yaml_copy: - logger.info(f"Writing modified RayService yaml to {yaml_copy.name}.") - yaml_copy.writelines(yaml.safe_dump(updated_cr)) - yaml_copy.flush() - - cr_events: List[CREvent] = [ - RayServiceAddCREvent( - custom_resource_object=self.cr, - rulesets=[RuleSet([EasyJobRule(), CurlServiceRule(queries=self.default_queries)])], - filepath=self.sample_path - ), - RayServiceUpdateCREvent( - custom_resource_object=self.cr, - rulesets=[RuleSet([CurlServiceRule(queries=updated_queries)])], - filepath=yaml_copy.name, - switch_cluster=True, - query_while_updating=allowed_queries_during_update, - ), - RayServiceDeleteCREvent(custom_resource_object=self.cr, filepath=self.sample_path), - ] - - for cr_event in cr_events: - cr_event.trigger() - -class TestRayServiceAutoscaling: - """Test RayService autoscaling""" - @pytest.fixture - def set_up_cluster(self): - """Set up a K8s cluster, deploy the KubeRay operator, and start a curl Pod""" - K8S_CLUSTER_MANAGER.cleanup() - K8S_CLUSTER_MANAGER.initialize_cluster() - operator_manager = OperatorManager.instance() - operator_manager.prepare_operator() - start_curl_pod("curl", "default") - - yield - - K8S_CLUSTER_MANAGER.cleanup() - - def test_service_autoscaling(self, set_up_cluster): - """This test uses a special workload that can allow us to - reliably test autoscaling. - - The workload consists of two applications. The first application - checks on an event in the second application. If the event isn't - set, the first application will block on requests until the - event is set. So, first we send a bunch of requests to the first - application, which will trigger Serve autoscaling to bring up - more replicas since the existing replicas are blocked on - requests. Worker pods should scale up. Then we set the event in - the second application, releasing all blocked requests. Worker - pods should scale down. - """ - dir_path = "ray-operator/config/samples/" - cr_yaml_path = CONST.REPO_ROOT.joinpath(dir_path).joinpath("ray-service.autoscaler.yaml") - with open(cr_yaml_path, encoding="utf-8") as cr_yaml: - cr = yaml.safe_load(cr_yaml) - - scale_up_rule = AutoscaleRule( - query={"path": "/", "json_args": {}}, - num_repeat=20, - expected_worker_pods=5, - timeout=30, - message="Sending a lot of requests. Worker pods should start scaling up..." - ) - scale_down_rule = AutoscaleRule( - query={"path": "/signal", "json_args": {}}, - num_repeat=1, - expected_worker_pods=0, - timeout=400, - message="Releasing all blocked requests. Worker pods should start scaling down..." - ) - cr_events: List[CREvent] = [ - RayServiceAddCREvent( - custom_resource_object=cr, - rulesets=[RuleSet([scale_up_rule, scale_down_rule])], - timeout=120, - namespace=NAMESPACE, - filepath=cr_yaml_path, - ), - RayServiceDeleteCREvent(cr, [], 90, NAMESPACE, cr_yaml_path), - ] - - for cr_event in cr_events: - cr_event.trigger() - -if __name__ == "__main__": - sys.exit(pytest.main(["-v", "-s", __file__])) diff --git a/third_party/swagger-ui/swagger-initializer.js b/third_party/swagger-ui/swagger-initializer.js index 3eaadc5698c..4bb7c6e895c 100644 --- a/third_party/swagger-ui/swagger-initializer.js +++ b/third_party/swagger-ui/swagger-initializer.js @@ -4,11 +4,11 @@ window.onload = function() { // the following lines will be replaced by docker/configurator, when it runs in a docker-container window.ui = SwaggerUIBundle({ spec: location.host, - urls: [{"url":"http://"+location.host+"/swagger/serve.swagger.json","name":"RayServe Service"}, - {"url":"http://"+location.host+"/swagger/error.swagger.json","name":"Errors API"}, - {"url":"http://"+location.host+"/swagger/job.swagger.json","name":"RayJob Service"}, - {"url":"http://"+location.host+"/swagger/config.swagger.json","name":"ComputeTemplate Service"}, - {"url":"http://"+location.host+"/swagger/cluster.swagger.json","name":"Cluster Service"}], + urls: [{"url":window.location.protocol+"//"+location.host+"/swagger/serve.swagger.json","name":"RayServe Service"}, + {"url":window.location.protocol+"//"+location.host+"/swagger/error.swagger.json","name":"Errors API"}, + {"url":window.location.protocol+"//"+location.host+"/swagger/job.swagger.json","name":"RayJob Service"}, + {"url":window.location.protocol+"//"+location.host+"/swagger/config.swagger.json","name":"ComputeTemplate Service"}, + {"url":window.location.protocol+"//"+location.host+"/swagger/cluster.swagger.json","name":"Cluster Service"}], dom_id: '#swagger-ui', deepLinking: true, presets: [