Skip to content

Commit

Permalink
feat: add support for mounted instance-store ephemeral storage (#4735)
Browse files Browse the repository at this point in the history
  • Loading branch information
alec-rabold authored Dec 14, 2023
1 parent 0492fea commit 571e0fb
Show file tree
Hide file tree
Showing 19 changed files with 235 additions and 30 deletions.
50 changes: 50 additions & 0 deletions examples/v1beta1/instance-store-ephemeral-storage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This example NodePool will provision AL2 instances with
# local NVMe instance-store disks used for node ephemeral storage.
---
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
annotations:
kubernetes.io/description: "General purpose NodePool for generic workloads"
spec:
template:
spec:
requirements:
- key: kubernetes.io/arch
operator: In
values: ["amd64"]
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["on-demand"]
- key: karpenter.k8s.aws/instance-category
operator: In
values: ["c", "m", "r"]
- key: karpenter.k8s.aws/instance-generation
operator: Gt
values: ["2"]
- key: karpenter.k8s.aws/instance-local-nvme
operator: Gt
values: ["300"]
nodeClassRef:
name: instance-store-ephemeral-storage
---
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: instance-store-ephemeral-storage
annotations:
kubernetes.io/description: "EC2NodeClass to provision nodes with instance-store ephemeral storage"
spec:
amiFamily: AL2 # Amazon Linux 2
instanceStorePolicy: "RAID0"
role: "KarpenterNodeRole-${CLUSTER_NAME}" # replace with your cluster name
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: "${CLUSTER_NAME}" # replace with your cluster name
6 changes: 6 additions & 0 deletions pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ spec:
x-kubernetes-validations:
- message: instanceProfile cannot be empty
rule: self != ''
instanceStorePolicy:
description: InstanceStorePolicy specifies how to handle instance-store
disks.
enum:
- RAID0
type: string
metadataOptions:
default:
httpEndpoint: enabled
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/v1beta1/ec2nodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ type EC2NodeClassSpec struct {
// +kubebuilder:validation:MaxItems:=50
// +optional
BlockDeviceMappings []*BlockDeviceMapping `json:"blockDeviceMappings,omitempty"`
// InstanceStorePolicy specifies how to handle instance-store disks.
// +optional
InstanceStorePolicy *InstanceStorePolicy `json:"instanceStorePolicy,omitempty"`
// DetailedMonitoring controls if detailed monitoring is enabled for instances that are launched
// +optional
DetailedMonitoring *bool `json:"detailedMonitoring,omitempty"`
Expand Down Expand Up @@ -295,6 +298,18 @@ type BlockDevice struct {
VolumeType *string `json:"volumeType,omitempty"`
}

// InstanceStorePolicy enumerates options for configuring instance store disks.
// +kubebuilder:validation:Enum={RAID0}
type InstanceStorePolicy string

const (
// InstanceStorePolicyRAID0 configures a RAID-0 array that includes all ephemeral NVMe instance storage disks.
// The containerd and kubelet state directories (`/var/lib/containerd` and `/var/lib/kubelet`) will then use the
// ephemeral storage for more and faster node ephemeral-storage. The node's ephemeral storage can be shared among
// pods that request ephemeral storage and container images that are downloaded to the node.
InstanceStorePolicyRAID0 InstanceStorePolicy = "RAID0"
)

// EC2NodeClass is the Schema for the EC2NodeClass API
// +kubebuilder:object:root=true
// +kubebuilder:resource:path=ec2nodeclasses,scope=Cluster,categories=karpenter,shortName={ec2nc,ec2ncs}
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions pkg/fake/ec2api.go
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,18 @@ func (e *EC2API) DescribeInstanceTypeOfferingsWithContext(_ context.Context, _ *
InstanceType: aws.String("m5.metal"),
Location: aws.String("test-zone-1c"),
},
{
InstanceType: aws.String("m6idn.32xlarge"),
Location: aws.String("test-zone-1a"),
},
{
InstanceType: aws.String("m6idn.32xlarge"),
Location: aws.String("test-zone-1b"),
},
{
InstanceType: aws.String("m6idn.32xlarge"),
Location: aws.String("test-zone-1c"),
},
},
}, nil
}
Expand Down
17 changes: 9 additions & 8 deletions pkg/providers/amifamily/al2.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,19 @@ func (a AL2) DefaultAMIs(version string) []DefaultAMIOutput {
// even if elements of those inputs are in differing orders,
// guaranteeing it won't cause spurious hash differences.
// AL2 userdata also works on Ubuntu
func (a AL2) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
func (a AL2) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string, instanceStorePolicy *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper {
containerRuntime := aws.String("containerd")
return bootstrap.EKS{
ContainerRuntime: *containerRuntime,
Options: bootstrap.Options{
ClusterName: a.Options.ClusterName,
ClusterEndpoint: a.Options.ClusterEndpoint,
KubeletConfig: kubeletConfig,
Taints: taints,
Labels: labels,
CABundle: caBundle,
CustomUserData: customUserData,
ClusterName: a.Options.ClusterName,
ClusterEndpoint: a.Options.ClusterEndpoint,
KubeletConfig: kubeletConfig,
Taints: taints,
Labels: labels,
CABundle: caBundle,
CustomUserData: customUserData,
InstanceStorePolicy: instanceStorePolicy,
},
}
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/providers/amifamily/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (

corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
"sigs.k8s.io/karpenter/pkg/utils/resources"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
)

// Options is the node bootstrapping parameters passed from Karpenter to the provisioning node
Expand All @@ -39,6 +41,7 @@ type Options struct {
AWSENILimitedPodDensity bool
ContainerRuntime *string
CustomUserData *string
InstanceStorePolicy *v1beta1.InstanceStorePolicy
}

func (o Options) kubeletExtraArgs() (args []string) {
Expand Down
5 changes: 5 additions & 0 deletions pkg/providers/amifamily/bootstrap/eksbootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
"strings"

"github.com/samber/lo"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
)

type EKS struct {
Expand Down Expand Up @@ -78,6 +80,9 @@ func (e EKS) eksBootstrapScript() string {
if args := e.kubeletExtraArgs(); len(args) > 0 {
userData.WriteString(fmt.Sprintf(" \\\n--kubelet-extra-args '%s'", strings.Join(args, " ")))
}
if lo.FromPtr(e.InstanceStorePolicy) == v1beta1.InstanceStorePolicyRAID0 {
userData.WriteString(" \\\n--local-disks raid0")
}
return userData.String()
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/amifamily/bottlerocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (b Bottlerocket) DefaultAMIs(version string) []DefaultAMIOutput {
}

// UserData returns the default userdata script for the AMI Family
func (b Bottlerocket) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
func (b Bottlerocket) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string, _ *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper {
return bootstrap.Bottlerocket{
Options: bootstrap.Options{
ClusterName: b.Options.ClusterName,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/amifamily/custom.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type Custom struct {
}

// UserData returns the default userdata script for the AMI Family
func (c Custom) UserData(_ *corev1beta1.KubeletConfiguration, _ []v1.Taint, _ map[string]string, _ *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
func (c Custom) UserData(_ *corev1beta1.KubeletConfiguration, _ []v1.Taint, _ map[string]string, _ *string, _ []*cloudprovider.InstanceType, customUserData *string, _ *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper {
return bootstrap.Custom{
Options: bootstrap.Options{
CustomUserData: customUserData,
Expand Down
12 changes: 7 additions & 5 deletions pkg/providers/amifamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ type Resolver struct {

// Options define the static launch template parameters
type Options struct {
ClusterName string
ClusterEndpoint string
InstanceProfile string
CABundle *string `hash:"ignore"`
ClusterName string
ClusterEndpoint string
InstanceProfile string
CABundle *string `hash:"ignore"`
InstanceStorePolicy *v1beta1.InstanceStorePolicy
// Level-triggered fields that may change out of sync.
SecurityGroups []v1beta1.SecurityGroup
Tags map[string]string
Expand All @@ -75,7 +76,7 @@ type LaunchTemplate struct {
// AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters
type AMIFamily interface {
DefaultAMIs(version string) []DefaultAMIOutput
UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, instanceTypes []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper
UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []core.Taint, labels map[string]string, caBundle *string, instanceTypes []*cloudprovider.InstanceType, customUserData *string, instanceStorePolicy *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper
DefaultBlockDeviceMappings() []*v1beta1.BlockDeviceMapping
DefaultMetadataOptions() *v1beta1.MetadataOptions
EphemeralBlockDevice() *string
Expand Down Expand Up @@ -224,6 +225,7 @@ func (r Resolver) resolveLaunchTemplate(nodeClass *v1beta1.EC2NodeClass, nodeCla
options.CABundle,
instanceTypes,
nodeClass.Spec.UserData,
options.InstanceStorePolicy,
),
BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings,
MetadataOptions: nodeClass.Spec.MetadataOptions,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/amifamily/ubuntu.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (u Ubuntu) DefaultAMIs(version string) []DefaultAMIOutput {
}

// UserData returns the default userdata script for the AMI Family
func (u Ubuntu) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
func (u Ubuntu) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string, _ *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper {
return bootstrap.EKS{
Options: bootstrap.Options{
ClusterName: u.Options.ClusterName,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/amifamily/windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (w Windows) DefaultAMIs(version string) []DefaultAMIOutput {
}

// UserData returns the default userdata script for the AMI Family
func (w Windows) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
func (w Windows) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string, _ *v1beta1.InstanceStorePolicy) bootstrap.Bootstrapper {
return bootstrap.Windows{
Options: bootstrap.Options{
ClusterName: w.Options.ClusterName,
Expand Down
23 changes: 23 additions & 0 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,29 @@ var _ = Describe("InstanceTypes", func() {
}
Expect(nodes.Len()).To(Equal(1))
})
It("should not launch instances w/ instance storage for ephemeral storage resource requests when exceeding blockDeviceMapping", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
pod := coretest.UnschedulablePod(coretest.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("5000Gi")},
},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should launch instances w/ instance storage for ephemeral storage resource requests when disks are mounted for ephemeral-storage", func() {
nodeClass.Spec.InstanceStorePolicy = lo.ToPtr(v1beta1.InstanceStorePolicyRAID0)
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
pod := coretest.UnschedulablePod(coretest.PodOptions{
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("5000Gi")},
},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
node := ExpectScheduled(ctx, env.Client, pod)
Expect(node.Labels[v1.LabelInstanceTypeStable]).To(Equal("m6idn.32xlarge"))
Expect(*node.Status.Capacity.StorageEphemeral()).To(Equal(resource.MustParse("7600G")))
})
It("should not set pods to 110 if using ENI-based pod density", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.GetInstanceTypes(ctx)
Expect(err).To(BeNil())
Expand Down
26 changes: 16 additions & 10 deletions pkg/providers/instancetype/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ func NewInstanceType(ctx context.Context, info *ec2.InstanceTypeInfo, kc *corev1
Name: aws.StringValue(info.InstanceType),
Requirements: computeRequirements(info, offerings, region, amiFamily),
Offerings: offerings,
Capacity: computeCapacity(ctx, info, amiFamily, nodeClass.Spec.BlockDeviceMappings, kc),
Capacity: computeCapacity(ctx, info, amiFamily, nodeClass, kc),
Overhead: &cloudprovider.InstanceTypeOverhead{
KubeReserved: kubeReservedResources(cpu(info), pods(ctx, info, amiFamily, kc), ENILimitedPods(ctx, info), amiFamily, kc),
SystemReserved: systemReservedResources(kc),
EvictionThreshold: evictionThreshold(memory(ctx, info), ephemeralStorage(amiFamily, nodeClass.Spec.BlockDeviceMappings), amiFamily, kc),
EvictionThreshold: evictionThreshold(memory(ctx, info), ephemeralStorage(info, amiFamily, nodeClass), amiFamily, kc),
},
}
if it.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, string(v1.Windows)))) == nil {
Expand Down Expand Up @@ -169,12 +169,12 @@ func getArchitecture(info *ec2.InstanceTypeInfo) string {
}

func computeCapacity(ctx context.Context, info *ec2.InstanceTypeInfo, amiFamily amifamily.AMIFamily,
blockDeviceMappings []*v1beta1.BlockDeviceMapping, kc *corev1beta1.KubeletConfiguration) v1.ResourceList {
nodeClass *v1beta1.EC2NodeClass, kc *corev1beta1.KubeletConfiguration) v1.ResourceList {

resourceList := v1.ResourceList{
v1.ResourceCPU: *cpu(info),
v1.ResourceMemory: *memory(ctx, info),
v1.ResourceEphemeralStorage: *ephemeralStorage(amiFamily, blockDeviceMappings),
v1.ResourceEphemeralStorage: *ephemeralStorage(info, amiFamily, nodeClass),
v1.ResourcePods: *pods(ctx, info, amiFamily, kc),
v1beta1.ResourceAWSPodENI: *awsPodENI(aws.StringValue(info.InstanceType)),
v1beta1.ResourceNVIDIAGPU: *nvidiaGPUs(info),
Expand Down Expand Up @@ -202,23 +202,29 @@ func memory(ctx context.Context, info *ec2.InstanceTypeInfo) *resource.Quantity
return mem
}

// Setting ephemeral-storage to be either the default value or what is defined in blockDeviceMappings
func ephemeralStorage(amiFamily amifamily.AMIFamily, blockDeviceMappings []*v1beta1.BlockDeviceMapping) *resource.Quantity {
if len(blockDeviceMappings) != 0 {
// Setting ephemeral-storage to be either the default value, what is defined in blockDeviceMappings, or the combined size of local store volumes.
func ephemeralStorage(info *ec2.InstanceTypeInfo, amiFamily amifamily.AMIFamily, nodeClass *v1beta1.EC2NodeClass) *resource.Quantity {
// If local store disks have been configured for node ephemeral-storage, use the total size of the disks.
if lo.FromPtr(nodeClass.Spec.InstanceStorePolicy) == v1beta1.InstanceStorePolicyRAID0 {
if info.InstanceStorageInfo != nil && info.InstanceStorageInfo.TotalSizeInGB != nil {
return resources.Quantity(fmt.Sprintf("%dG", *info.InstanceStorageInfo.TotalSizeInGB))
}
}
if len(nodeClass.Spec.BlockDeviceMappings) != 0 {
// First check if there's a root volume configured in blockDeviceMappings.
if blockDeviceMapping, ok := lo.Find(blockDeviceMappings, func(bdm *v1beta1.BlockDeviceMapping) bool {
if blockDeviceMapping, ok := lo.Find(nodeClass.Spec.BlockDeviceMappings, func(bdm *v1beta1.BlockDeviceMapping) bool {
return bdm.RootVolume
}); ok && blockDeviceMapping.EBS.VolumeSize != nil {
return blockDeviceMapping.EBS.VolumeSize
}
switch amiFamily.(type) {
case *amifamily.Custom:
// We can't know if a custom AMI is going to have a volume size.
volumeSize := blockDeviceMappings[len(blockDeviceMappings)-1].EBS.VolumeSize
volumeSize := nodeClass.Spec.BlockDeviceMappings[len(nodeClass.Spec.BlockDeviceMappings)-1].EBS.VolumeSize
return lo.Ternary(volumeSize != nil, volumeSize, amifamily.DefaultEBS.VolumeSize)
default:
// If a block device mapping exists in the provider for the root volume, use the volume size specified in the provider. If not, use the default
if blockDeviceMapping, ok := lo.Find(blockDeviceMappings, func(bdm *v1beta1.BlockDeviceMapping) bool {
if blockDeviceMapping, ok := lo.Find(nodeClass.Spec.BlockDeviceMappings, func(bdm *v1beta1.BlockDeviceMapping) bool {
return *bdm.DeviceName == *amiFamily.EphemeralBlockDevice()
}); ok && blockDeviceMapping.EBS.VolumeSize != nil {
return blockDeviceMapping.EBS.VolumeSize
Expand Down
7 changes: 4 additions & 3 deletions pkg/providers/launchtemplate/launchtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,10 @@ func (p *Provider) createAMIOptions(ctx context.Context, nodeClass *v1beta1.EC2N
return nil, fmt.Errorf("no security groups exist given constraints")
}
options := &amifamily.Options{
ClusterName: options.FromContext(ctx).ClusterName,
ClusterEndpoint: p.ClusterEndpoint,
InstanceProfile: instanceProfile,
ClusterName: options.FromContext(ctx).ClusterName,
ClusterEndpoint: p.ClusterEndpoint,
InstanceProfile: instanceProfile,
InstanceStorePolicy: nodeClass.Spec.InstanceStorePolicy,
SecurityGroups: lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) v1beta1.SecurityGroup {
return v1beta1.SecurityGroup{ID: aws.StringValue(s.GroupId), Name: aws.StringValue(s.GroupName)}
}),
Expand Down
Loading

0 comments on commit 571e0fb

Please sign in to comment.