From c86420992389648a1fd189ddb95cffc2660d02f0 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Wed, 31 Jan 2024 14:05:19 +0100 Subject: [PATCH 01/10] add support matrix --- docs/supported_distros.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 docs/supported_distros.md diff --git a/docs/supported_distros.md b/docs/supported_distros.md new file mode 100644 index 0000000..b9ba69d --- /dev/null +++ b/docs/supported_distros.md @@ -0,0 +1,15 @@ +# Supported-Matrix + +We support the same Kubernetes distributions as Kwasm. + +| Container runtimes | Kind | Azure Kubernetes | GCP Kubernetes | AWS Kubernetes | Digital Ocean Kubernetes | CIVO Kubernetes | Kubernetes in Docker | Minikube | Canonical MicroK8s | +|---------------------|------|------------------|----------------|----------------|--------------------------|-----------------|----------------------|----------|--------------------| +| WasmEdge | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | +| Wasmtime | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | +| Fermion Spin | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | +| Wasm Workers Server | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | +| Lunatic | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | +| Slight | ✅ | ✅ | (✅) | (✅) | ✅ | ✅ | ✅ | ✅ | ✅ | + +✅ = officially supported +(✅) = only with Ubuntu Nodes \ No newline at end of file From 1e2f888a3ad766987c582780775f7bb619010d53 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Wed, 31 Jan 2024 14:19:46 +0100 Subject: [PATCH 02/10] reintroduce controller namespace --- Makefile | 2 +- cmd/main.go | 25 +++++++++++++------------ internal/controller/shim_controller.go | 19 +++++++++++++------ 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 1c29c1c..5377a78 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ build: manifests generate fmt vet golangci-build ## Build manager binary. .PHONY: run run: manifests generate fmt vet ## Run a controller from your host. - go run -ldflags "${LDFLAGS}" ./cmd/main.go + CONTROLLER_NAMESPACE="default" go run -ldflags "${LDFLAGS}" ./cmd/main.go # If you wish to build the manager image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. diff --git a/cmd/main.go b/cmd/main.go index 7e99f8d..d72fdc5 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -18,6 +18,7 @@ package main import ( "flag" + "fmt" "os" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -51,18 +52,18 @@ func init() { } // getWatchNamespace returns the Namespace the operator should be watching for changes -// func getWatchNamespace() string { -// // WatchNamespaceEnvVar is the constant for env variable WATCH_NAMESPACE -// // which specifies the Namespace to watch. -// // An empty value means the operator will fail to start. -// watchNamespaceEnvVar := "CONTROLLER_NAMESPACE" - -// ns, found := os.LookupEnv(watchNamespaceEnvVar) -// if !found { -// panic(fmt.Sprintf("env var '%s' must be set", watchNamespaceEnvVar)) -// } -// return ns -// } +func getWatchNamespace() string { + // WatchNamespaceEnvVar is the constant for env variable WATCH_NAMESPACE + // which specifies the Namespace to watch. + // An empty value means the operator will fail to start. + watchNamespaceEnvVar := "CONTROLLER_NAMESPACE" + + ns, found := os.LookupEnv(watchNamespaceEnvVar) + if !found { + panic(fmt.Sprintf("env var '%s' must be set", watchNamespaceEnvVar)) + } + return ns +} func main() { var metricsAddr string diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index 0dcb679..236bcf7 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "math" + "os" "github.com/rs/zerolog/log" "k8s.io/apimachinery/pkg/runtime" @@ -132,6 +133,7 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, nil } +// handleDeployJob deploys a Job to each node in a list. func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shim, nodes *corev1.NodeList, req ctrl.Request) (ctrl.Result, error) { switch shim.Spec.RolloutStrategy.Type { case "rolling": @@ -142,8 +144,9 @@ func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shi { log.Debug().Msgf("Recreate strategy selected") for i := range nodes.Items { - log.Info().Msgf("Deploying on node: %s", nodes.Items[i].Name) - job, err := sr.createJobManifest(shim, &nodes.Items[i], req) + node := nodes.Items[i] + log.Info().Msgf("Deploying on node: %s", node.Name) + job, err := sr.createJobManifest(shim, &node, req) if err != nil { return ctrl.Result{}, err } @@ -171,6 +174,7 @@ func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shi return ctrl.Result{}, nil } +// createJobManifest creates a Job manifest for a Shim. func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Node, req ctrl.Request) (*batchv1.Job, error) { priv := true name := node.Name + "." + shim.Name @@ -183,7 +187,7 @@ func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Nod }, ObjectMeta: metav1.ObjectMeta{ Name: name[:nameMax], - Namespace: "default", + Namespace: os.Getenv("CONTROLLER_NAMESPACE"), Labels: map[string]string{name[:nameMax]: "true"}, }, Spec: batchv1.JobSpec{ @@ -246,6 +250,7 @@ func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Nod return job, nil } +// handleDeployRuntmeClass deploys a RuntimeClass for a Shim. func (sr *ShimReconciler) handleDeployRuntmeClass(ctx context.Context, shim *kwasmv1.Shim) (ctrl.Result, error) { log.Info().Msgf("Deploying RuntimeClass: %s", shim.Spec.RuntimeClass.Name) rc, err := sr.createRuntimeClassManifest(shim) @@ -269,6 +274,7 @@ func (sr *ShimReconciler) handleDeployRuntmeClass(ctx context.Context, shim *kwa return ctrl.Result{}, nil } +// createRuntimeClassManifest creates a RuntimeClass manifest for a Shim. func (sr *ShimReconciler) createRuntimeClassManifest(shim *kwasmv1.Shim) (*nodev1.RuntimeClass, error) { name := shim.Name nameMax := int(math.Min(float64(len(name)), 63)) @@ -285,7 +291,7 @@ func (sr *ShimReconciler) createRuntimeClassManifest(shim *kwasmv1.Shim) (*nodev }, ObjectMeta: metav1.ObjectMeta{ Name: name[:nameMax], - Namespace: "default", + Namespace: os.Getenv("CONTROLLER_NAMESPACE"), Labels: map[string]string{name[:nameMax]: "true"}, }, Handler: shim.Spec.RuntimeClass.Handler, @@ -328,9 +334,10 @@ func (sr *ShimReconciler) findJobsForShim(ctx context.Context, shim *kwasmv1.Shi jobs := &batchv1.JobList{} - err := sr.List(ctx, jobs, client.InNamespace("default"), client.MatchingLabels(map[string]string{name[:nameMax]: "true"})) + err := sr.List(ctx, jobs, client.InNamespace(os.Getenv("CONTROLLER_NAMESPACE")), client.MatchingLabels(map[string]string{name[:nameMax]: "true"})) + log.Debug().Msgf("Found %d jobs", len(jobs.Items)) - log.Debug().Msgf("Found: %v", len(jobs.Items)) + if err != nil { return nil, err } From 0549ff191554205bbddb8b3ade417bc5eb5556c2 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Wed, 31 Jan 2024 14:52:09 +0100 Subject: [PATCH 03/10] add convenience tasks to makefile --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index 5377a78..6e7bc5a 100644 --- a/Makefile +++ b/Makefile @@ -191,3 +191,14 @@ $(CONTROLLER_GEN): $(LOCALBIN) envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. $(ENVTEST): $(LOCALBIN) test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest + +.PHONY: create-test-cluster +create-kind-cluster: + kind create cluster --config ./hack/kind.yaml --name kwasm + +.PHONY: kind-delete +kind-delete: + kind delete cluster --name kwasm + +.PHONY: kind +kind: create-kind-cluster install From 22c2239051a6177681ea6b7c825c3fa29683066e Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Wed, 31 Jan 2024 14:52:29 +0100 Subject: [PATCH 04/10] add additional printcolumn to shim overview --- api/v1alpha1/shim_types.go | 1 + config/crd/bases/runtime.kwasm.sh_shims.yaml | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/api/v1alpha1/shim_types.go b/api/v1alpha1/shim_types.go index 4c55467..64ed729 100644 --- a/api/v1alpha1/shim_types.go +++ b/api/v1alpha1/shim_types.go @@ -59,6 +59,7 @@ type ShimStatus struct { // +kubebuilder:object:root=true // +kubebuilder:resource:path=shims,scope=Cluster +// +kubebuilder:printcolumn:JSONPath=".spec.runtimeClass.name",name=RuntimeClass,type=string // Shim is the Schema for the shims API type Shim struct { metav1.TypeMeta `json:",inline"` diff --git a/config/crd/bases/runtime.kwasm.sh_shims.yaml b/config/crd/bases/runtime.kwasm.sh_shims.yaml index d10e2d6..c3db86f 100644 --- a/config/crd/bases/runtime.kwasm.sh_shims.yaml +++ b/config/crd/bases/runtime.kwasm.sh_shims.yaml @@ -14,7 +14,11 @@ spec: singular: shim scope: Cluster versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .spec.runtimeClass.name + name: RuntimeClass + type: string + name: v1alpha1 schema: openAPIV3Schema: description: Shim is the Schema for the shims API @@ -157,3 +161,4 @@ spec: type: object served: true storage: true + subresources: {} From 2fd41682e3eb8aab9179121acfe8238f7c5000ee Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Fri, 2 Feb 2024 17:17:08 +0100 Subject: [PATCH 05/10] add update of node labels --- PROJECT | 5 + cmd/main.go | 9 +- config/rbac/role.yaml | 26 +++++ go.mod | 6 +- go.sum | 7 ++ internal/controller/job_controller.go | 149 +++++++++++++++++++++++++ internal/controller/shim_controller.go | 77 ++++++++++--- 7 files changed, 257 insertions(+), 22 deletions(-) create mode 100644 internal/controller/job_controller.go diff --git a/PROJECT b/PROJECT index 2e3b552..425f44d 100644 --- a/PROJECT +++ b/PROJECT @@ -22,4 +22,9 @@ resources: group: runtime kind: Node version: v1alpha1 +- controller: true + domain: kwasm.sh + group: runtime + kind: Job + version: v1alpha1 version: "3" diff --git a/cmd/main.go b/cmd/main.go index d72fdc5..b6cea29 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -56,7 +56,7 @@ func getWatchNamespace() string { // WatchNamespaceEnvVar is the constant for env variable WATCH_NAMESPACE // which specifies the Namespace to watch. // An empty value means the operator will fail to start. - watchNamespaceEnvVar := "CONTROLLER_NAMESPACE" + watchNamespaceEnvVar := " " ns, found := os.LookupEnv(watchNamespaceEnvVar) if !found { @@ -119,6 +119,13 @@ func main() { // setupLog.Error(err, "unable to create controller", "controller", "Node") // os.Exit(1) // } + if err = (&controller.JobReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Job") + os.Exit(1) + } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 205e25e..7fcb7fe 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,6 +4,32 @@ kind: ClusterRole metadata: name: manager-role rules: +- apiGroups: + - runtime.kwasm.sh + resources: + - jobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - runtime.kwasm.sh + resources: + - jobs/finalizers + verbs: + - update +- apiGroups: + - runtime.kwasm.sh + resources: + - jobs/status + verbs: + - get + - patch + - update - apiGroups: - runtime.kwasm.sh resources: diff --git a/go.mod b/go.mod index 21e0415..fdfb123 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,9 @@ go 1.21 require ( github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 + github.com/prometheus/common v0.44.0 + github.com/rs/zerolog v1.31.0 + k8s.io/api v0.28.3 k8s.io/apimachinery v0.28.3 k8s.io/client-go v0.28.3 sigs.k8s.io/controller-runtime v0.16.3 @@ -44,9 +47,7 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.16.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect - github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect - github.com/rs/zerolog v1.31.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.25.0 // indirect @@ -64,7 +65,6 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.28.3 // indirect k8s.io/apiextensions-apiserver v0.28.3 // indirect k8s.io/component-base v0.28.3 // indirect k8s.io/klog/v2 v2.100.1 // indirect diff --git a/go.sum b/go.sum index 33fc1f6..dfb3968 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,6 @@ github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= +github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= @@ -15,6 +16,7 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= @@ -67,6 +69,7 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -105,6 +108,7 @@ github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= @@ -120,12 +124,14 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= @@ -142,6 +148,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= +golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= diff --git a/internal/controller/job_controller.go b/internal/controller/job_controller.go new file mode 100644 index 0000000..4b24036 --- /dev/null +++ b/internal/controller/job_controller.go @@ -0,0 +1,149 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + "github.com/rs/zerolog/log" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// JobReconciler reconciles a Job object +type JobReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=runtime.kwasm.sh,resources=jobs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=runtime.kwasm.sh,resources=jobs/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=runtime.kwasm.sh,resources=jobs/finalizers,verbs=update + +// SetupWithManager sets up the controller with the Manager. +func (jr *JobReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + // Uncomment the following line adding a pointer to an instance of the controlled resource as an argument + For(&batchv1.Job{}). + Complete(jr) +} + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Job object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (jr *JobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.With().Str("job", req.Name).Logger() + log.Debug().Msg("Job Reconciliation started!") + + job := &batchv1.Job{} + + if err := jr.Get(ctx, req.NamespacedName, job); err != nil { + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + log.Error().Msgf("Unable to get Job: %s", err) + return ctrl.Result{}, fmt.Errorf("failed to get Job: %w", err) + } + + if _, exists := job.Labels["kwasm.sh/shimName"]; !exists { + return ctrl.Result{}, nil + } + + shimName := job.Labels["kwasm.sh/shimName"] + + node, err := jr.getNode(ctx, job.Spec.Template.Spec.NodeName, req) + if err != nil { + return ctrl.Result{}, nil + } + + _, finishedType := jr.isJobFinished(job) + switch finishedType { + case "": // ongoing + log.Info().Msgf("Job %s is still Ongoing", job.Name) + // if err := jr.updateNodeLabels(ctx, node, shimName, "pending", req); err != nil { + // log.Error().Msgf("Unable to update node label %s: %s", shimName, err) + // } + return ctrl.Result{}, nil + case batchv1.JobFailed: + log.Info().Msgf("Job %s is still failing...", job.Name) + if err := jr.updateNodeLabels(ctx, node, shimName, "failed", req); err != nil { + log.Error().Msgf("Unable to update node label %s: %s", shimName, err) + } + return ctrl.Result{}, nil + case batchv1.JobFailureTarget: + log.Info().Msgf("Job %s is about to fail", job.Name) + if err := jr.updateNodeLabels(ctx, node, shimName, "failed", req); err != nil { + log.Error().Msgf("Unable to update node label %s: %s", shimName, err) + } + return ctrl.Result{}, nil + case batchv1.JobComplete: + log.Info().Msgf("Job %s is Completed. Happy WASMing", job.Name) + if err := jr.updateNodeLabels(ctx, node, shimName, "provisioned", req); err != nil { + log.Error().Msgf("Unable to update node label %s: %s", shimName, err) + } + return ctrl.Result{}, nil + case batchv1.JobSuspended: + log.Info().Msgf("Job %s is suspended", job.Name) + return ctrl.Result{}, nil + } + + return ctrl.Result{}, nil +} + +func (jr *JobReconciler) updateNodeLabels(ctx context.Context, node *corev1.Node, shimName string, status string, req ctrl.Request) error { + node.Labels[shimName] = status + + if err := jr.Update(ctx, node); err != nil { + return err + } + + return nil +} + +func (jr *JobReconciler) getNode(ctx context.Context, nodeName string, req ctrl.Request) (*corev1.Node, error) { + node := corev1.Node{} + if err := jr.Client.Get(ctx, types.NamespacedName{Name: nodeName}, &node); err != nil { + log.Err(err).Msg("Unable to fetch node") + return &corev1.Node{}, client.IgnoreNotFound(err) + } + return &node, nil +} + +func (jr *JobReconciler) isJobFinished(job *batchv1.Job) (bool, batchv1.JobConditionType) { + for _, c := range job.Status.Conditions { + if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == corev1.ConditionTrue { + return true, c.Type + } + } + + return false, "" +} diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index 236bcf7..b079213 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -46,8 +46,7 @@ const ( // ShimReconciler reconciles a Shim object type ShimReconciler struct { client.Client - Scheme *runtime.Scheme - AutoProvision bool + Scheme *runtime.Scheme } //+kubebuilder:rbac:groups=runtime.kwasm.sh,resources=shims,verbs=get;list;watch;create;update;patch;delete @@ -145,23 +144,19 @@ func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shi log.Debug().Msgf("Recreate strategy selected") for i := range nodes.Items { node := nodes.Items[i] - log.Info().Msgf("Deploying on node: %s", node.Name) - job, err := sr.createJobManifest(shim, &node, req) - if err != nil { - return ctrl.Result{}, err - } - // We want to use server-side apply https://kubernetes.io/docs/reference/using-api/server-side-apply - patchMethod := client.Apply - patchOptions := &client.PatchOptions{ - Force: ptr(true), // Force b/c any fields we are setting need to be owned by the spin-operator - FieldManager: "shim-operator", - } + shimProvisioned := node.Labels[shim.Name] == "provisioned" + shimPending := node.Labels[shim.Name] == "pending" + + if !shimProvisioned && !shimPending { - // Note that we reconcile even if the deployment is in a good state. We rely on controller-runtime to rate limit us. - if err := sr.Client.Patch(ctx, job, patchMethod, patchOptions); err != nil { - log.Error().Msgf("Unable to reconcile Job %s", err) - return ctrl.Result{}, err + err := sr.deployJobOnNode(ctx, shim, node, req) + if err != nil { + return ctrl.Result{}, err + } + + } else { + log.Info().Msgf("Shim %s already provisioned on Node %s", shim.Name, node.Name) } } } @@ -174,6 +169,48 @@ func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shi return ctrl.Result{}, nil } +// deployJobOnNode deploys a Job to a Node. +func (sr *ShimReconciler) deployJobOnNode(ctx context.Context, shim *kwasmv1.Shim, node corev1.Node, req ctrl.Request) error { + log.Info().Msgf("Deploying Shim %s on node: %s", shim.Name, node.Name) + + if err := sr.updateNodeLabels(ctx, &node, shim, "pending", req); err != nil { + log.Error().Msgf("Unable to update node label %s: %s", shim.Name, err) + } + + job, err := sr.createJobManifest(shim, &node, req) + if err != nil { + return err + } + + // We want to use server-side apply https://kubernetes.io/docs/reference/using-api/server-side-apply + patchMethod := client.Apply + patchOptions := &client.PatchOptions{ + Force: ptr(true), // Force b/c any fields we are setting need to be owned by the spin-operator + FieldManager: "shim-operator", + } + + // We rely on controller-runtime to rate limit us. + if err := sr.Client.Patch(ctx, job, patchMethod, patchOptions); err != nil { + log.Error().Msgf("Unable to reconcile Job %s", err) + if err := sr.updateNodeLabels(ctx, &node, shim, "failed", req); err != nil { + log.Error().Msgf("Unable to update node label %s: %s", shim.Name, err) + } + return err + } + + return nil +} + +func (sr *ShimReconciler) updateNodeLabels(ctx context.Context, node *corev1.Node, shim *kwasmv1.Shim, status string, req ctrl.Request) error { + node.Labels[shim.Name] = status + + if err := sr.Update(ctx, node); err != nil { + return err + } + + return nil +} + // createJobManifest creates a Job manifest for a Shim. func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Node, req ctrl.Request) (*batchv1.Job, error) { priv := true @@ -188,7 +225,11 @@ func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Nod ObjectMeta: metav1.ObjectMeta{ Name: name[:nameMax], Namespace: os.Getenv("CONTROLLER_NAMESPACE"), - Labels: map[string]string{name[:nameMax]: "true"}, + Labels: map[string]string{ + name[:nameMax]: "true", + "kwasm.sh/shimName": shim.Name, + "kwasm.sh/job": "true", + }, }, Spec: batchv1.JobSpec{ Template: corev1.PodTemplateSpec{ From 944472be7e2267ef6360c0b53d9787445785ac81 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Fri, 2 Feb 2024 21:07:07 +0100 Subject: [PATCH 06/10] add some status info to shim resource --- api/v1alpha1/shim_types.go | 6 +- config/crd/bases/runtime.kwasm.sh_shims.yaml | 13 ++++ internal/controller/shim_controller.go | 78 ++++++++++++++++---- 3 files changed, 81 insertions(+), 16 deletions(-) diff --git a/api/v1alpha1/shim_types.go b/api/v1alpha1/shim_types.go index 64ed729..ee10684 100644 --- a/api/v1alpha1/shim_types.go +++ b/api/v1alpha1/shim_types.go @@ -54,12 +54,16 @@ type RollingSpec struct { // ShimStatus defines the observed state of Shim // +operator-sdk:csv:customresourcedefinitions:type=status type ShimStatus struct { - Conditions []metav1.Condition `json:"conditions,omitempty"` + Conditions []metav1.Condition `json:"conditions,omitempty"` + NodeCount int `json:"nodes"` + NodeReadyCount int `json:"nodesReady"` } // +kubebuilder:object:root=true // +kubebuilder:resource:path=shims,scope=Cluster // +kubebuilder:printcolumn:JSONPath=".spec.runtimeClass.name",name=RuntimeClass,type=string +// +kubebuilder:printcolumn:JSONPath=".status.nodesReady",name=Ready,type=integer +// +kubebuilder:printcolumn:JSONPath=".status.nodes",name=Nodes,type=integer // Shim is the Schema for the shims API type Shim struct { metav1.TypeMeta `json:",inline"` diff --git a/config/crd/bases/runtime.kwasm.sh_shims.yaml b/config/crd/bases/runtime.kwasm.sh_shims.yaml index c3db86f..6629739 100644 --- a/config/crd/bases/runtime.kwasm.sh_shims.yaml +++ b/config/crd/bases/runtime.kwasm.sh_shims.yaml @@ -18,6 +18,12 @@ spec: - jsonPath: .spec.runtimeClass.name name: RuntimeClass type: string + - jsonPath: .status.nodesReady + name: Ready + type: integer + - jsonPath: .status.nodes + name: Nodes + type: integer name: v1alpha1 schema: openAPIV3Schema: @@ -157,6 +163,13 @@ spec: - type type: object type: array + nodes: + type: integer + nodesReady: + type: integer + required: + - nodes + - nodesReady type: object type: object served: true diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index b079213..64dd66b 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -72,7 +72,7 @@ func (sr *ShimReconciler) SetupWithManager(mgr ctrl.Manager) error { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := log.With().Str("shim", req.Name).Logger() - log.Debug().Msg("Reconciliation started!") + ctx = log.WithContext(ctx) // 1. Check if the shim resource exists var shimResource kwasmv1.Shim @@ -81,6 +81,29 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, client.IgnoreNotFound(err) } + // 2. Get list of nodes where this shim is supposed to be deployed on + nodes := &corev1.NodeList{} + if shimResource.Spec.NodeSelector != nil { + // 3.1 that match the nodeSelector + err := sr.List(ctx, nodes, client.InNamespace(req.Namespace), client.MatchingLabels(shimResource.Spec.NodeSelector)) + if err != nil { + return ctrl.Result{}, err + } + } else { + // 3.2 or no selector at all (all nodes) + err := sr.List(ctx, nodes, client.InNamespace(req.Namespace)) + if err != nil { + return ctrl.Result{}, err + } + } + + // TODO: Update the number of nodes that are relevant to this shim + err := sr.updateStatus(ctx, &shimResource, nodes) + if err != nil { + log.Error().Msgf("Unable to update node count: %s", err) + return ctrl.Result{}, err + } + // Shim has been requested for deletion, delete the child resources if !shimResource.DeletionTimestamp.IsZero() { log.Debug().Msg("deletion started!") @@ -93,7 +116,7 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, client.IgnoreNotFound(err) } - // 2. Check if referenced runtimeClass exists in cluster + // 3. Check if referenced runtimeClass exists in cluster rcExists, err := sr.runtimeClassExists(ctx, &shimResource) if err != nil { log.Error().Msgf("RuntimeClass issue: %s", err) @@ -106,19 +129,6 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl } } - // 3. Get list of nodes - nodes := &corev1.NodeList{} - if shimResource.Spec.NodeSelector != nil { - // 3.1 that match the nodeSelector - err = sr.List(ctx, nodes, client.InNamespace(req.Namespace), client.MatchingLabels(shimResource.Spec.NodeSelector)) - } else { - // 3.2 or no selector at all (all nodes) - err = sr.List(ctx, nodes, client.InNamespace(req.Namespace)) - } - if err != nil { - return ctrl.Result{}, err - } - // 4. Deploy job to each node in list if len(nodes.Items) != 0 { _, err = sr.handleDeployJob(ctx, &shimResource, nodes, req) @@ -132,8 +142,37 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, nil } +func (sr *ShimReconciler) updateStatus(ctx context.Context, shim *kwasmv1.Shim, nodes *corev1.NodeList) error { + log := log.Ctx(ctx) + + shim.Status.NodeCount = len(nodes.Items) + shim.Status.NodeReadyCount = 0 + + if len(nodes.Items) >= 0 { + for _, node := range nodes.Items { + if node.Labels[shim.Name] == "provisioned" { + shim.Status.NodeReadyCount++ + } + } + } + + if err := sr.Update(ctx, shim); err != nil { + log.Error().Msgf("Unable to update status %s", err) + } + + // Re-fetch shim to avoid "object has been modified" errors + if err := sr.Client.Get(ctx, types.NamespacedName{Name: shim.Name, Namespace: shim.Namespace}, shim); err != nil { + log.Error().Msgf("Unable to re-fetch app: %s", err) + return err + } + + return nil +} + // handleDeployJob deploys a Job to each node in a list. func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shim, nodes *corev1.NodeList, req ctrl.Request) (ctrl.Result, error) { + log := log.Ctx(ctx) + switch shim.Spec.RolloutStrategy.Type { case "rolling": { @@ -171,6 +210,8 @@ func (sr *ShimReconciler) handleDeployJob(ctx context.Context, shim *kwasmv1.Shi // deployJobOnNode deploys a Job to a Node. func (sr *ShimReconciler) deployJobOnNode(ctx context.Context, shim *kwasmv1.Shim, node corev1.Node, req ctrl.Request) error { + log := log.Ctx(ctx) + log.Info().Msgf("Deploying Shim %s on node: %s", shim.Name, node.Name) if err := sr.updateNodeLabels(ctx, &node, shim, "pending", req); err != nil { @@ -293,6 +334,8 @@ func (sr *ShimReconciler) createJobManifest(shim *kwasmv1.Shim, node *corev1.Nod // handleDeployRuntmeClass deploys a RuntimeClass for a Shim. func (sr *ShimReconciler) handleDeployRuntmeClass(ctx context.Context, shim *kwasmv1.Shim) (ctrl.Result, error) { + log := log.Ctx(ctx) + log.Info().Msgf("Deploying RuntimeClass: %s", shim.Spec.RuntimeClass.Name) rc, err := sr.createRuntimeClassManifest(shim) if err != nil { @@ -370,6 +413,8 @@ func (sr *ShimReconciler) findShim(ctx context.Context, shim *kwasmv1.Shim) (*kw // findJobsForShim finds all jobs related to a ShimResource. func (sr *ShimReconciler) findJobsForShim(ctx context.Context, shim *kwasmv1.Shim) (*batchv1.JobList, error) { + log := log.Ctx(ctx) + name := shim.Name + "-provisioner" nameMax := int(math.Min(float64(len(name)), 63)) @@ -388,6 +433,7 @@ func (sr *ShimReconciler) findJobsForShim(ctx context.Context, shim *kwasmv1.Shi // deleteShim deletes a ShimResource. func (sr *ShimReconciler) deleteShim(ctx context.Context, shim *kwasmv1.Shim) error { + log := log.Ctx(ctx) log.Info().Msgf("Deleting Shim... %s", shim.Name) s, err := sr.findShim(ctx, shim) @@ -430,6 +476,8 @@ func (sr *ShimReconciler) deleteJobs(ctx context.Context, shim *kwasmv1.Shim) er // runtimeClassExists checks whether a RuntimeClass for a Shim exists. func (sr *ShimReconciler) runtimeClassExists(ctx context.Context, shim *kwasmv1.Shim) (bool, error) { + log := log.Ctx(ctx) + if shim.Spec.RuntimeClass.Name != "" { rc, err := sr.findRuntimeClass(ctx, shim) if err != nil { From 658eb46978711ae9e9edd663e0d0da583eac405e Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Fri, 2 Feb 2024 23:25:43 +0100 Subject: [PATCH 07/10] add node watcher to reconcile shims on change --- internal/controller/shim_controller.go | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index 64dd66b..b4e4f15 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -31,8 +31,12 @@ import ( nodev1 "k8s.io/api/node/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" kwasmv1 "github.com/kwasm/kwasm-operator/api/v1alpha1" ) @@ -57,7 +61,19 @@ type ShimReconciler struct { func (sr *ShimReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&kwasmv1.Shim{}). + // As we create and own the created jobs + // Jobs are important for us to update the Shims installation status + // on respective nodes Owns(&batchv1.Job{}). + // As we don't own nodes, but need to react on node label changes, + // we need to watch node label changes. + // Whenever a label changes, we want to reconcile Shims, to make sure + // that the shim is deployed on the node if it should be. + Watches( + &corev1.Node{}, + handler.EnqueueRequestsFromMapFunc(sr.findShimsToReconcile), + builder.WithPredicates(predicate.LabelChangedPredicate{}), + ). Complete(sr) } @@ -142,6 +158,32 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, nil } +// findShimsToReconcile finds all Shims that need to be reconciled. +// This function is required e.g. to react on node label changes. +// When the label of a node changes, we want to reconcile shims to make sure +// that the shim is deployed on the node if it should be. +func (sr *ShimReconciler) findShimsToReconcile(ctx context.Context, node client.Object) []reconcile.Request { + shimList := &kwasmv1.ShimList{} + listOps := &client.ListOptions{ + Namespace: "", + } + err := sr.List(context.TODO(), shimList, listOps) + if err != nil { + return []reconcile.Request{} + } + + requests := make([]reconcile.Request, len(shimList.Items)) + for i, item := range shimList.Items { + requests[i] = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: item.GetName(), + Namespace: item.GetNamespace(), + }, + } + } + return requests +} + func (sr *ShimReconciler) updateStatus(ctx context.Context, shim *kwasmv1.Shim, nodes *corev1.NodeList) error { log := log.Ctx(ctx) From 71204cf4a7d6e05452224b3675eeda4fdf68a5ef Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Fri, 2 Feb 2024 23:28:22 +0100 Subject: [PATCH 08/10] add finalizer to shim resource --- internal/controller/shim_controller.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index b4e4f15..5b1b4a8 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -155,7 +155,8 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl log.Info().Msg("No nodes found") } - return ctrl.Result{}, nil + err = sr.ensureFinalizer(ctx, &shimResource) + return ctrl.Result{}, client.IgnoreNotFound(err) } // findShimsToReconcile finds all Shims that need to be reconciled. @@ -557,6 +558,17 @@ func (sr *ShimReconciler) removeFinalizer(ctx context.Context, shim *kwasmv1.Shi return nil } +// ensureFinalizer ensures the finalizer is present on a Shim resource. +func (sr *ShimReconciler) ensureFinalizer(ctx context.Context, shim *kwasmv1.Shim) error { + if !controllerutil.ContainsFinalizer(shim, KwasmOperatorFinalizer) { + controllerutil.AddFinalizer(shim, KwasmOperatorFinalizer) + if err := sr.Client.Update(ctx, shim); err != nil { + return err + } + } + return nil +} + func ptr[T any](v T) *T { return &v } From 1797e9751de9caa881933e172f3f4ba997af14f4 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Sat, 3 Feb 2024 00:01:47 +0100 Subject: [PATCH 09/10] add shim deletion logic to remove labels from nodes --- internal/controller/shim_controller.go | 119 +++++++++---------------- 1 file changed, 41 insertions(+), 78 deletions(-) diff --git a/internal/controller/shim_controller.go b/internal/controller/shim_controller.go index 5b1b4a8..59b9906 100644 --- a/internal/controller/shim_controller.go +++ b/internal/controller/shim_controller.go @@ -98,23 +98,12 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl } // 2. Get list of nodes where this shim is supposed to be deployed on - nodes := &corev1.NodeList{} - if shimResource.Spec.NodeSelector != nil { - // 3.1 that match the nodeSelector - err := sr.List(ctx, nodes, client.InNamespace(req.Namespace), client.MatchingLabels(shimResource.Spec.NodeSelector)) - if err != nil { - return ctrl.Result{}, err - } - } else { - // 3.2 or no selector at all (all nodes) - err := sr.List(ctx, nodes, client.InNamespace(req.Namespace)) - if err != nil { - return ctrl.Result{}, err - } + nodes, err := sr.getNodeListFromShimsNodeSelctor(ctx, &shimResource) + if err != nil { + return ctrl.Result{}, err } - // TODO: Update the number of nodes that are relevant to this shim - err := sr.updateStatus(ctx, &shimResource, nodes) + err = sr.updateStatus(ctx, &shimResource, nodes) if err != nil { log.Error().Msgf("Unable to update node count: %s", err) return ctrl.Result{}, err @@ -122,12 +111,12 @@ func (sr *ShimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Shim has been requested for deletion, delete the child resources if !shimResource.DeletionTimestamp.IsZero() { - log.Debug().Msg("deletion started!") + log.Debug().Msgf("Deleting shim %s", shimResource.Name) err := sr.handleDeletion(ctx, &shimResource) if err != nil { return ctrl.Result{}, err } - log.Debug().Msg("removing finalizer!") + err = sr.removeFinalizer(ctx, &shimResource) return ctrl.Result{}, client.IgnoreNotFound(err) } @@ -199,6 +188,8 @@ func (sr *ShimReconciler) updateStatus(ctx context.Context, shim *kwasmv1.Shim, } } + // TODO: include proper status conditions to update + if err := sr.Update(ctx, shim); err != nil { log.Error().Msgf("Unable to update status %s", err) } @@ -436,7 +427,14 @@ func (sr *ShimReconciler) createRuntimeClassManifest(shim *kwasmv1.Shim) (*nodev // handleDeletion deletes all possible child resources of a Shim. It will ignore NotFound errors. func (sr *ShimReconciler) handleDeletion(ctx context.Context, shim *kwasmv1.Shim) error { - err := sr.deleteShim(ctx, shim) + // TODO: deploy uninstall job here + // err := sr.handleUninstall(ctx, shim) + // if client.IgnoreNotFound(err) != nil { + // return err + // } + + // remove shim labels from node + err := sr.removeShimLabelsFromNodes(ctx, shim) if client.IgnoreNotFound(err) != nil { return err } @@ -444,77 +442,42 @@ func (sr *ShimReconciler) handleDeletion(ctx context.Context, shim *kwasmv1.Shim return nil } -// findShim finds a ShimResource. -func (sr *ShimReconciler) findShim(ctx context.Context, shim *kwasmv1.Shim) (*kwasmv1.Shim, error) { - var s kwasmv1.Shim - err := sr.Client.Get(ctx, types.NamespacedName{Name: shim.Name, Namespace: shim.Namespace}, &s) - if err != nil { - return nil, err - } - return &s, nil -} - -// findJobsForShim finds all jobs related to a ShimResource. -func (sr *ShimReconciler) findJobsForShim(ctx context.Context, shim *kwasmv1.Shim) (*batchv1.JobList, error) { +func (sr *ShimReconciler) removeShimLabelsFromNodes(ctx context.Context, shim *kwasmv1.Shim) error { log := log.Ctx(ctx) - name := shim.Name + "-provisioner" - nameMax := int(math.Min(float64(len(name)), 63)) - - jobs := &batchv1.JobList{} - - err := sr.List(ctx, jobs, client.InNamespace(os.Getenv("CONTROLLER_NAMESPACE")), client.MatchingLabels(map[string]string{name[:nameMax]: "true"})) - - log.Debug().Msgf("Found %d jobs", len(jobs.Items)) - - if err != nil { - return nil, err - } - - return jobs, nil -} - -// deleteShim deletes a ShimResource. -func (sr *ShimReconciler) deleteShim(ctx context.Context, shim *kwasmv1.Shim) error { - log := log.Ctx(ctx) - log.Info().Msgf("Deleting Shim... %s", shim.Name) - - s, err := sr.findShim(ctx, shim) - if err != nil { - return err - } - - err = sr.deleteJobs(ctx, s) + nodes, err := sr.getNodeListFromShimsNodeSelctor(ctx, shim) if err != nil { return err } - // TODO: if Shim resource is deleted, it needs to be removed from nodes as well - err = sr.Client.Delete(ctx, s) - if err != nil { - return err + for _, node := range nodes.Items { + if _, ok := node.Labels[shim.Name]; ok { + log.Debug().Msgf("Removing label %s from node %s", shim.Name, node.Name) + delete(node.Labels, shim.Name) + if err := sr.Update(ctx, &node); err != nil { + log.Error().Msgf("Unable to remove label %s from node %s: %s", shim.Name, node.Name, err) + } + } } - log.Info().Msgf("Successfully deleted Shim... %s", shim.Name) - return nil } -// deleteJobs deletes all Jobs associated with a ShimResource. -func (sr *ShimReconciler) deleteJobs(ctx context.Context, shim *kwasmv1.Shim) error { - jobsList, err := sr.findJobsForShim(ctx, shim) - if err != nil { - return err - } - - for _, job := range jobsList.Items { - err = sr.Client.Delete(ctx, &job) +func (sr *ShimReconciler) getNodeListFromShimsNodeSelctor(ctx context.Context, shim *kwasmv1.Shim) (*corev1.NodeList, error) { + nodes := &corev1.NodeList{} + if shim.Spec.NodeSelector != nil { + err := sr.List(ctx, nodes, client.InNamespace(shim.Namespace), client.MatchingLabels(shim.Spec.NodeSelector)) if err != nil { - return err + return &corev1.NodeList{}, err + } + } else { + err := sr.List(ctx, nodes, client.InNamespace(shim.Namespace)) + if err != nil { + return &corev1.NodeList{}, err } } - return nil + return nodes, nil } // runtimeClassExists checks whether a RuntimeClass for a Shim exists. @@ -522,7 +485,7 @@ func (sr *ShimReconciler) runtimeClassExists(ctx context.Context, shim *kwasmv1. log := log.Ctx(ctx) if shim.Spec.RuntimeClass.Name != "" { - rc, err := sr.findRuntimeClass(ctx, shim) + rc, err := sr.getRuntimeClass(ctx, shim) if err != nil { log.Debug().Msgf("No RuntimeClass '%s' found", shim.Spec.RuntimeClass.Name) @@ -532,13 +495,13 @@ func (sr *ShimReconciler) runtimeClassExists(ctx context.Context, shim *kwasmv1. return true, nil } } else { - log.Debug().Msg("RuntimeClass not defined") + log.Debug().Msg("Shim.Spec.RuntimeClass not defined") return false, nil } } -// findRuntimeClass finds a RuntimeClass. -func (sr *ShimReconciler) findRuntimeClass(ctx context.Context, shim *kwasmv1.Shim) (*nodev1.RuntimeClass, error) { +// getRuntimeClass finds a RuntimeClass. +func (sr *ShimReconciler) getRuntimeClass(ctx context.Context, shim *kwasmv1.Shim) (*nodev1.RuntimeClass, error) { rc := nodev1.RuntimeClass{} err := sr.Client.Get(ctx, types.NamespacedName{Name: shim.Spec.RuntimeClass.Name, Namespace: shim.Namespace}, &rc) if err != nil { From 7816486408ff7a24fc29adfdf89f4c04dfcfe735 Mon Sep 17 00:00:00 2001 From: Christoph Voigt Date: Sat, 3 Feb 2024 00:34:34 +0100 Subject: [PATCH 10/10] add shortcut to deploy sample shims to makefile --- Makefile | 12 ++++++++---- config/samples/kustomization.yaml | 5 ++++- config/samples/runtime_v1alpha1_shim.yaml | 12 ------------ config/samples/test_shim_lunatic.yaml | 6 ++++++ config/samples/test_shim_slight.yaml | 6 ++++++ config/samples/test_shim_spin.yaml | 6 ++++++ config/samples/test_shim_wws.yaml | 6 ++++++ 7 files changed, 36 insertions(+), 17 deletions(-) delete mode 100644 config/samples/runtime_v1alpha1_shim.yaml diff --git a/Makefile b/Makefile index 6e7bc5a..516c713 100644 --- a/Makefile +++ b/Makefile @@ -138,10 +138,6 @@ endif install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - -.PHONY: install-test-shims -install-test-shims: ## Install test shims from ./hack/. - kubectl apply -f ./hack/*.yaml - .PHONY: uninstall uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - @@ -151,6 +147,14 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - +.PHONY: deploy-samples +deploy-samples: ## Install test shims from ./hack/. + $(KUSTOMIZE) build config/samples | $(KUBECTL) apply -f - + +.PHONY: undeploy-samples +undeploy-samples: ## Install test shims from ./hack/. + $(KUSTOMIZE) build config/samples | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + .PHONY: undeploy undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index f38bc69..d82d2f6 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,4 +1,7 @@ ## Append samples of your project ## resources: -- runtime_v1alpha1_shim.yaml +- test_shim_lunatic.yaml +- test_shim_slight.yaml +- test_shim_spin.yaml +- test_shim_wws.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/runtime_v1alpha1_shim.yaml b/config/samples/runtime_v1alpha1_shim.yaml deleted file mode 100644 index 5e0d1ff..0000000 --- a/config/samples/runtime_v1alpha1_shim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: runtime.kwasm.sh/v1alpha1 -kind: Shim -metadata: - labels: - app.kubernetes.io/name: shim - app.kubernetes.io/instance: shim-sample - app.kubernetes.io/part-of: kwasm-operator - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/created-by: kwasm-operator - name: shim-sample -spec: - # TODO(user): Add fields here diff --git a/config/samples/test_shim_lunatic.yaml b/config/samples/test_shim_lunatic.yaml index 0bcedfe..7d75142 100644 --- a/config/samples/test_shim_lunatic.yaml +++ b/config/samples/test_shim_lunatic.yaml @@ -2,6 +2,12 @@ apiVersion: runtime.kwasm.sh/v1alpha1 kind: Shim metadata: name: lunatic-v1 + labels: + app.kubernetes.io/name: lunatic-v1 + app.kubernetes.io/instance: lunatic-v1 + app.kubernetes.io/part-of: kwasm-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: kwasm-operator spec: nodeSelector: lunatic: "true" diff --git a/config/samples/test_shim_slight.yaml b/config/samples/test_shim_slight.yaml index 6b87511..7cb031f 100644 --- a/config/samples/test_shim_slight.yaml +++ b/config/samples/test_shim_slight.yaml @@ -2,6 +2,12 @@ apiVersion: runtime.kwasm.sh/v1alpha1 kind: Shim metadata: name: slight-v1 + labels: + app.kubernetes.io/name: slight-v1 + app.kubernetes.io/instance: slight-v1 + app.kubernetes.io/part-of: kwasm-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: kwasm-operator spec: nodeSelector: slight: "true" diff --git a/config/samples/test_shim_spin.yaml b/config/samples/test_shim_spin.yaml index 62443a8..d4c5771 100644 --- a/config/samples/test_shim_spin.yaml +++ b/config/samples/test_shim_spin.yaml @@ -2,6 +2,12 @@ apiVersion: runtime.kwasm.sh/v1alpha1 kind: Shim metadata: name: wasmtime-spin-v2 + labels: + app.kubernetes.io/name: wasmtime-spin-v2 + app.kubernetes.io/instance: wasmtime-spin-v2 + app.kubernetes.io/part-of: kwasm-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: kwasm-operator spec: nodeSelector: spin: "true" diff --git a/config/samples/test_shim_wws.yaml b/config/samples/test_shim_wws.yaml index f7a8eed..135d95d 100644 --- a/config/samples/test_shim_wws.yaml +++ b/config/samples/test_shim_wws.yaml @@ -2,6 +2,12 @@ apiVersion: runtime.kwasm.sh/v1alpha1 kind: Shim metadata: name: wws-v1 + labels: + app.kubernetes.io/name: wws-v1 + app.kubernetes.io/instance: wws-v1 + app.kubernetes.io/part-of: kwasm-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: kwasm-operator spec: nodeSelector: wws: "true"