From acba7a9b1ec99263734e62ed76a46314ad25c47c Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 21 Sep 2023 09:37:43 -0700 Subject: [PATCH] fix: Allow specifying root volume in block device mappings (#4457) Co-authored-by: Hao Luo --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 5 ++ pkg/apis/v1beta1/ec2nodeclass.go | 3 + pkg/apis/v1beta1/ec2nodeclass_validation.go | 7 +++ pkg/apis/v1beta1/suite_test.go | 28 ++++++++++ pkg/providers/instancetype/types.go | 6 ++ .../launchtemplate/nodeclass_test.go | 55 +++++++++++++++++-- 6 files changed, 99 insertions(+), 5 deletions(-) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 54c40a91352c..71fd0962605a 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -149,6 +149,11 @@ spec: in the Amazon Elastic Compute Cloud User Guide. type: string type: object + rootVolume: + description: RootVolume is a flag indicating if this device + is mounted as kubelet root dir. You can configure at most + one root volume in BlockDeviceMappings. + type: boolean type: object type: array context: diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index 68170c886419..74c4ce16e80c 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -206,6 +206,9 @@ type BlockDeviceMapping struct { // EBS contains parameters used to automatically set up EBS volumes when an instance is launched. // +optional EBS *BlockDevice `json:"ebs,omitempty"` + // RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can + // configure at most one root volume in BlockDeviceMappings. + RootVolume bool `json:"rootVolume,omitempty"` } type BlockDevice struct { diff --git a/pkg/apis/v1beta1/ec2nodeclass_validation.go b/pkg/apis/v1beta1/ec2nodeclass_validation.go index 4f52cc3f3037..d3f282a3b11c 100644 --- a/pkg/apis/v1beta1/ec2nodeclass_validation.go +++ b/pkg/apis/v1beta1/ec2nodeclass_validation.go @@ -194,10 +194,17 @@ func (in *EC2NodeClassSpec) validateStringEnum(value, field string, validValues } func (in *EC2NodeClassSpec) validateBlockDeviceMappings() (errs *apis.FieldError) { + numRootVolume := 0 for i, blockDeviceMapping := range in.BlockDeviceMappings { if err := in.validateBlockDeviceMapping(blockDeviceMapping); err != nil { errs = errs.Also(err.ViaFieldIndex(blockDeviceMappingsPath, i)) } + if blockDeviceMapping.RootVolume { + numRootVolume++ + } + } + if numRootVolume > 1 { + errs = errs.Also(apis.ErrMultipleOneOf("more than 1 root volume configured")) } return errs } diff --git a/pkg/apis/v1beta1/suite_test.go b/pkg/apis/v1beta1/suite_test.go index b155f262624a..c1e6387d87b3 100644 --- a/pkg/apis/v1beta1/suite_test.go +++ b/pkg/apis/v1beta1/suite_test.go @@ -23,6 +23,7 @@ import ( "github.com/imdario/mergo" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "knative.dev/pkg/logging/testing" @@ -534,4 +535,31 @@ var _ = Describe("Validation", func() { Expect(nodeClass.Hash()).To(Equal(otherNodeClass.Hash())) }) }) + Context("EC2NodeClass BlockDeviceMapping", func() { + It("should fail if more than one root volume is specified", func() { + nodeClass := test.EC2NodeClass(v1beta1.EC2NodeClass{ + Spec: v1beta1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1beta1.BlockDeviceMapping{ + { + DeviceName: aws.String("map-device-1"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(50, resource.Giga), + }, + + RootVolume: true, + }, + { + DeviceName: aws.String("map-device-2"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(50, resource.Giga), + }, + + RootVolume: true, + }, + }, + }, + }) + Expect(nodeClass.Validate(ctx)).To(Not(Succeed())) + }) + }) }) diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 30e4364b82ad..a126ac84615d 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -208,6 +208,12 @@ func memory(ctx context.Context, info *ec2.InstanceTypeInfo) *resource.Quantity // Setting ephemeral-storage to be either the default value or what is defined in blockDeviceMappings func ephemeralStorage(amiFamily amifamily.AMIFamily, blockDeviceMappings []*v1beta1.BlockDeviceMapping) *resource.Quantity { if len(blockDeviceMappings) != 0 { + // First check if there's a root volume configured in blockDeviceMappings. + if blockDeviceMapping, ok := lo.Find(blockDeviceMappings, func(bdm *v1beta1.BlockDeviceMapping) bool { + return bdm.RootVolume + }); ok && blockDeviceMapping.EBS.VolumeSize != nil { + return blockDeviceMapping.EBS.VolumeSize + } switch amiFamily.(type) { case *amifamily.Custom: // We can't know if a custom AMI is going to have a volume size. diff --git a/pkg/providers/launchtemplate/nodeclass_test.go b/pkg/providers/launchtemplate/nodeclass_test.go index 4418ab7ee619..ca58edf4c3da 100644 --- a/pkg/providers/launchtemplate/nodeclass_test.go +++ b/pkg/providers/launchtemplate/nodeclass_test.go @@ -586,12 +586,20 @@ var _ = Describe("EC2NodeClass/LaunchTemplates", func() { ExpectNotScheduled(ctx, env.Client, pod) }) It("should pack pods using the blockdevicemappings from the provider spec when defined", func() { - nodeClass.Spec.BlockDeviceMappings = []*v1beta1.BlockDeviceMapping{{ - DeviceName: aws.String("/dev/xvda"), - EBS: &v1beta1.BlockDevice{ - VolumeSize: resource.NewScaledQuantity(50, resource.Giga), + nodeClass.Spec.BlockDeviceMappings = []*v1beta1.BlockDeviceMapping{ + { + DeviceName: aws.String("/dev/xvda"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(50, resource.Giga), + }, + }, + { + DeviceName: aws.String("/dev/xvdb"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(20, resource.Giga), + }, }, - }} + } ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{ResourceRequirements: v1.ResourceRequirements{ Requests: map[v1.ResourceName]resource.Quantity{ @@ -631,6 +639,43 @@ var _ = Describe("EC2NodeClass/LaunchTemplates", func() { }) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + // capacity isn't recorded on the node any longer, but we know the pod should schedule + ExpectScheduled(ctx, env.Client, pod) + }) + It("should pack pods using the configured root volume in blockdevicemappings", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} + nodeClass.Spec.BlockDeviceMappings = []*v1beta1.BlockDeviceMapping{ + { + DeviceName: aws.String("/dev/xvda"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(20, resource.Giga), + }, + }, + { + DeviceName: aws.String("/dev/xvdb"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(40, resource.Giga), + }, + RootVolume: true, + }, + { + DeviceName: aws.String("/dev/xvdc"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resource.NewScaledQuantity(20, resource.Giga), + }, + }, + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod(coretest.PodOptions{ResourceRequirements: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + // this pod can only be satisfied if `/dev/xvdb` will house all the pods. + v1.ResourceEphemeralStorage: resource.MustParse("25Gi"), + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + // capacity isn't recorded on the node any longer, but we know the pod should schedule ExpectScheduled(ctx, env.Client, pod) })