Skip to content

Commit

Permalink
[Feature] Introduce TAS e2e tests with dedicated infra (#3489)
Browse files Browse the repository at this point in the history
* Introduce TAS test-infra

* Add first e2e tests

* Add resource limit to job wrapper

* Add TAS e2e target for test-infra

* Update after code review
  • Loading branch information
mszadkow authored Nov 14, 2024
1 parent b97cad0 commit 9624ed8
Show file tree
Hide file tree
Showing 7 changed files with 433 additions and 6 deletions.
18 changes: 15 additions & 3 deletions Makefile-test.mk
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,13 @@ test-integration: gomod-download envtest ginkgo dep-crds kueuectl ginkgo-top ##

CREATE_KIND_CLUSTER ?= true
.PHONY: test-e2e
test-e2e: kustomize ginkgo yq gomod-download dep-crds kueuectl ginkgo-top run-test-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%)
test-e2e: kustomize ginkgo yq gomod-download dep-crds kueuectl ginkgo-top run-test-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%) run-test-tas-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%)

.PHONY: test-multikueue-e2e
test-multikueue-e2e: kustomize ginkgo yq gomod-download dep-crds ginkgo-top run-test-multikueue-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%)

.PHONY: test-tas-e2e
test-tas-e2e: kustomize ginkgo yq gomod-download dep-crds kueuectl ginkgo-top run-test-tas-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%)

E2E_TARGETS := $(addprefix run-test-e2e-,${E2E_K8S_VERSIONS})
MULTIKUEUE-E2E_TARGETS := $(addprefix run-test-multikueue-e2e-,${E2E_K8S_VERSIONS})
Expand All @@ -104,15 +106,25 @@ FORCE:
run-test-e2e-%: K8S_VERSION = $(@:run-test-e2e-%=%)
run-test-e2e-%: FORCE
@echo Running e2e for k8s ${K8S_VERSION}
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) ./hack/e2e-test.sh
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" \
JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) KIND_CLUSTER_FILE="kind-cluster.yaml" E2E_TARGET_FOLDER="singlecluster" ./hack/e2e-test.sh
$(PROJECT_DIR)/bin/ginkgo-top -i $(ARTIFACTS)/$@/e2e.json > $(ARTIFACTS)/$@/e2e-top.yaml

run-test-multikueue-e2e-%: K8S_VERSION = $(@:run-test-multikueue-e2e-%=%)
run-test-multikueue-e2e-%: FORCE
@echo Running multikueue e2e for k8s ${K8S_VERSION}
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) ./hack/multikueue-e2e-test.sh
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" \
JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) ./hack/multikueue-e2e-test.sh
$(PROJECT_DIR)/bin/ginkgo-top -i $(ARTIFACTS)/$@/e2e.json > $(ARTIFACTS)/$@/e2e-top.yaml

run-test-tas-e2e-%: K8S_VERSION = $(@:run-test-tas-e2e-%=%)
run-test-tas-e2e-%: FORCE
@echo Running tas e2e for k8s ${K8S_VERSION}
E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" \
JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) KIND_CLUSTER_FILE="tas-kind-cluster.yaml" E2E_TARGET_FOLDER="tas" ./hack/e2e-test.sh
$(PROJECT_DIR)/bin/ginkgo-top -i $(ARTIFACTS)/$@/e2e.json > $(ARTIFACTS)/$@/e2e-top.yaml


SCALABILITY_RUNNER := $(PROJECT_DIR)/bin/performance-scheduler-runner
.PHONY: performance-scheduler-runner
performance-scheduler-runner:
Expand Down
4 changes: 2 additions & 2 deletions hack/e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ function startup {
if [ ! -d "$ARTIFACTS" ]; then
mkdir -p "$ARTIFACTS"
fi
cluster_create "$KIND_CLUSTER_NAME" "$SOURCE_DIR/kind-cluster.yaml"
cluster_create "$KIND_CLUSTER_NAME" "$SOURCE_DIR/$KIND_CLUSTER_FILE"
fi
}

Expand All @@ -66,4 +66,4 @@ startup
kind_load
kueue_deploy
# shellcheck disable=SC2086
$GINKGO $GINKGO_ARGS --junit-report=junit.xml --json-report=e2e.json --output-dir="$ARTIFACTS" -v ./test/e2e/singlecluster/...
$GINKGO $GINKGO_ARGS --junit-report=junit.xml --json-report=e2e.json --output-dir="$ARTIFACTS" -v ./test/e2e/$E2E_TARGET_FOLDER/...
59 changes: 59 additions & 0 deletions hack/tas-kind-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
apiVersion: kubeadm.k8s.io/v1beta3
scheduler:
extraArgs:
v: "2"
controllerManager:
extraArgs:
v: "2"
apiServer:
extraArgs:
enable-aggregator-routing: "true"
v: "2"
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b1
cloud.provider.com/topology-rack: r1
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b1
cloud.provider.com/topology-rack: r1
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b1
cloud.provider.com/topology-rack: r2
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b1
cloud.provider.com/topology-rack: r2
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b2
cloud.provider.com/topology-rack: r1
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b2
cloud.provider.com/topology-rack: r1
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b2
cloud.provider.com/topology-rack: r2
- role: worker
labels:
cloud.provider.com/node-group: tas-group
cloud.provider.com/topology-block: b2
cloud.provider.com/topology-rack: r2
8 changes: 7 additions & 1 deletion pkg/util/testingjobs/job/wrappers.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func MakeJob(name, ns string) *JobWrapper {
{
Name: "c",
Image: "pause",
Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}},
Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}, Limits: corev1.ResourceList{}},
},
},
NodeSelector: map[string]string{},
Expand Down Expand Up @@ -180,6 +180,12 @@ func (j *JobWrapper) Request(r corev1.ResourceName, v string) *JobWrapper {
return j
}

// Limit adds a resource limit to the default container.
func (j *JobWrapper) Limit(r corev1.ResourceName, v string) *JobWrapper {
j.Spec.Template.Spec.Containers[0].Resources.Limits[r] = resource.MustParse(v)
return j
}

func (j *JobWrapper) Image(image string, args []string) *JobWrapper {
j.Spec.Template.Spec.Containers[0].Image = image
j.Spec.Template.Spec.Containers[0].Args = args
Expand Down
78 changes: 78 additions & 0 deletions test/e2e/tas/suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tase2e

import (
"context"
"fmt"
"os"
"testing"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

clientutil "sigs.k8s.io/kueue/pkg/util/client"
"sigs.k8s.io/kueue/test/util"
)

var (
k8sClient client.WithWatch
ctx context.Context
)

func TestAPIs(t *testing.T) {
suiteName := "End To End TAS Suite"
if ver, found := os.LookupEnv("E2E_KIND_VERSION"); found {
suiteName = fmt.Sprintf("%s: %s", suiteName, ver)
}
gomega.RegisterFailHandler(ginkgo.Fail)
ginkgo.RunSpecs(t,
suiteName,
)
}

var _ = ginkgo.BeforeSuite(func() {
ctrl.SetLogger(util.NewTestingLogger(ginkgo.GinkgoWriter, -3))

k8sClient, _ = util.CreateClientUsingCluster("")
ctx = context.Background()

waitForAvailableStart := time.Now()
util.WaitForKueueAvailability(ctx, k8sClient)
util.WaitForJobSetAvailability(ctx, k8sClient)
ginkgo.GinkgoLogr.Info("Kueue and JobSet operators are available in the cluster", "waitingTime", time.Since(waitForAvailableStart))

nodes := &corev1.NodeList{}
requiredLabels := client.MatchingLabels{}
requiredLabelKeys := client.HasLabels{tasNodeGroupLabel}
err := k8sClient.List(ctx, nodes, requiredLabels, requiredLabelKeys)
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "failed to list nodes for TAS")

for _, n := range nodes.Items {
err := clientutil.PatchStatus(ctx, k8sClient, &n, func() (bool, error) {
n.Status.Capacity[extraResource] = resource.MustParse("1")
n.Status.Allocatable[extraResource] = resource.MustParse("1")
return true, nil
})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
})
Loading

0 comments on commit 9624ed8

Please sign in to comment.