Skip to content

Commit

Permalink
Add support for tolerations in operator (#1648)
Browse files Browse the repository at this point in the history
Summary: Adds support for tolerations to enable users to deploy PEMs to
nodes that have taints through the CLI.

Relevant Issues: #598

Type of change: /kind feature

Test Plan: Used skaffold to deploy an unreleased version of the
operator. Verified that tolerations parameter actually gets passed
through to the pod spec.

Signed-off-by: Benjamin Kilimnik <[email protected]>
  • Loading branch information
benkilimnik authored Aug 4, 2023
1 parent c6a954a commit 6796db6
Show file tree
Hide file tree
Showing 8 changed files with 701 additions and 74 deletions.
1 change: 1 addition & 0 deletions .arclint
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"(^private\/credentials\/.*\\.sh)",
"(^private\/credentials\/.*\\.yaml)",
"(^src/operator/client/versioned/)",
"(^src/operator/apis/px.dev/v1alpha1/zz_generated.deepcopy.go)",
"(^src/stirling/bpf_tools/bcc_bpf/system-headers)",
"(^src/stirling/mysql/testing/.*\\.json$)",
"(^src/stirling/obj_tools/testdata/go/test_go_binary.go)",
Expand Down
43 changes: 43 additions & 0 deletions k8s/operator/crd/base/px.dev_viziers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,49 @@ spec:
format: int64
type: integer
type: object
tolerations:
description: 'Tolerations allows scheduling pods on nodes with
matching taints. More info: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/:
This field cannot be updated once the cluster is created.'
items:
description: The pod this Toleration is attached to tolerates
any taint that matches the triple <key,value,effect> using
the matching operator <operator>.
properties:
effect:
description: Effect indicates the taint effect to match.
Empty means match all taint effects. When specified, allowed
values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: Key is the taint key that the toleration applies
to. Empty means match all taint keys. If the key is empty,
operator must be Exists; this combination means to match
all values and all keys.
type: string
operator:
description: Operator represents a key's relationship to
the value. Valid operators are Exists and Equal. Defaults
to Equal. Exists is equivalent to wildcard for value,
so that a pod can tolerate all taints of a particular
category.
type: string
tolerationSeconds:
description: TolerationSeconds represents the period of
time the toleration (which must be of effect NoExecute,
otherwise this field is ignored) tolerates the taint.
By default, it is not set, which means tolerate the taint
forever (do not evict). Zero and negative values will
be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: Value is the taint value the toleration matches
to. If the operator is Exists, the value should be empty,
otherwise just a regular string.
type: string
type: object
type: array
type: object
registry:
description: 'Registry specifies the image registry to use rather
Expand Down
679 changes: 607 additions & 72 deletions src/api/proto/vizierconfigpb/vizier_types.pb.go

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions src/api/proto/vizierconfigpb/vizier_types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package px.vizierconfigpb;
option go_package = "vizierconfigpb";

import "github.com/gogo/protobuf/gogoproto/gogo.proto";
import "google/protobuf/wrappers.proto";

// Note: Any changes to this file should be in sync with operatpro/api/v1alpha1/vizier_types.go.
// VizierSpec defines the desired state of Vizier
Expand Down Expand Up @@ -90,6 +91,9 @@ message PodPolicyReq {
// NodeSelector is a selector which must be true for the pod to fit on a node.
// This field cannot be updated once the cluster is created.
map<string, string> nodeSelector = 4;
// NodeSelector allows scheduling pods on nodes with matching taints.
// This field cannot be updated once the cluster is created.
repeated Toleration tolerations = 5;
}

// ResourceReqs is copied from the k8s api:
Expand All @@ -100,6 +104,16 @@ message ResourceReqs {
ResourceList requests = 2;
}

// Toleration is copied from the k8s api:
// https://pkg.go.dev/k8s.io/api/core/v1#Toleration
message Toleration {
string key = 1;
string operator = 2;
string value = 3;
string effect = 4;
google.protobuf.Int64Value toleration_seconds = 5;
}

// ResourceList is copied from the k8s api: https://pkg.go.dev/k8s.io/api/core/v1#ResourceList
message ResourceList {
map<string, ResourceQuantity> resource_list = 1;
Expand Down
4 changes: 4 additions & 0 deletions src/operator/apis/px.dev/v1alpha1/vizier_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ type PodPolicy struct {
// More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
// This field cannot be updated once the cluster is created.
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// Tolerations allows scheduling pods on nodes with matching taints.
// More info: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/:
// This field cannot be updated once the cluster is created.
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
// The securityContext which should be set on non-privileged pods. All pods which require privileged permissions
// will still require a privileged securityContext.
SecurityContext *PodSecurityContext `json:"securityContext,omitempty"`
Expand Down
8 changes: 8 additions & 0 deletions src/operator/apis/px.dev/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/operator/controllers/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_library(
"//src/utils/shared/k8s",
"@com_github_blang_semver//:semver",
"@com_github_cenkalti_backoff_v4//:backoff",
"@com_github_gogo_protobuf//types",
"@com_github_sirupsen_logrus//:logrus",
"@io_k8s_api//apps/v1:apps",
"@io_k8s_api//core/v1:core",
Expand Down
25 changes: 23 additions & 2 deletions src/operator/controllers/vizier_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

"github.com/blang/semver"
"github.com/cenkalti/backoff/v4"
"github.com/gogo/protobuf/types"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -784,7 +785,7 @@ func updateResourceConfiguration(resource *k8s.Resource, vz *v1alpha1.Vizier) er
addKeyValueMapToResource("labels", vz.Spec.Pod.Labels, resource.Object.Object)
addKeyValueMapToResource("annotations", vz.Spec.Pod.Annotations, resource.Object.Object)
updateResourceRequirements(vz.Spec.Pod.Resources, resource.Object.Object)
updatePodSpec(vz.Spec.Pod.NodeSelector, vz.Spec.Pod.SecurityContext, resource.Object.Object)
updatePodSpec(vz.Spec.Pod.NodeSelector, vz.Spec.Pod.Tolerations, vz.Spec.Pod.SecurityContext, resource.Object.Object)
return nil
}

Expand Down Expand Up @@ -830,6 +831,7 @@ func generateVizierYAMLsConfig(ctx context.Context, ns string, k8sVersion string
Requests: convertResourceType(vz.Spec.Pod.Resources.Requests),
},
NodeSelector: vz.Spec.Pod.NodeSelector,
Tolerations: convertTolerations(vz.Spec.Pod.Tolerations),
},
Patches: vz.Spec.Patches,
Registry: vz.Spec.Registry,
Expand Down Expand Up @@ -956,7 +958,25 @@ func updateResourceRequirements(requirements v1.ResourceRequirements, res map[st
castedContainer["resources"] = resources
}
}
func updatePodSpec(nodeSelector map[string]string, securityCtx *v1alpha1.PodSecurityContext, res map[string]interface{}) {

func convertTolerations(tolerations []v1.Toleration) []*vizierconfigpb.Toleration {
var castedTolerations []*vizierconfigpb.Toleration
for _, toleration := range tolerations {
castedToleration := &vizierconfigpb.Toleration{
Key: toleration.Key,
Operator: string(toleration.Operator),
Value: toleration.Value,
Effect: string(toleration.Effect),
}
if toleration.TolerationSeconds != nil {
castedToleration.TolerationSeconds = &types.Int64Value{Value: *toleration.TolerationSeconds}
}
castedTolerations = append(castedTolerations, castedToleration)
}
return castedTolerations
}

func updatePodSpec(nodeSelector map[string]string, tolerations []v1.Toleration, securityCtx *v1alpha1.PodSecurityContext, res map[string]interface{}) {
podSpec := make(map[string]interface{})
md, ok, err := unstructured.NestedFieldNoCopy(res, "spec", "template", "spec")
if ok && err == nil {
Expand All @@ -977,6 +997,7 @@ func updatePodSpec(nodeSelector map[string]string, securityCtx *v1alpha1.PodSecu
castedNodeSelector[k] = v
}
podSpec["nodeSelector"] = castedNodeSelector
podSpec["tolerations"] = tolerations

// Add securityContext only if enabled.
if securityCtx == nil || !securityCtx.Enabled {
Expand Down

0 comments on commit 6796db6

Please sign in to comment.