Skip to content

Commit

Permalink
Use status values for AMI resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Apr 25, 2024
1 parent bc653f0 commit 5b2a3dc
Show file tree
Hide file tree
Showing 7 changed files with 248 additions and 51 deletions.
8 changes: 2 additions & 6 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,10 @@ func (c *CloudProvider) isAMIDrifted(ctx context.Context, nodeClaim *corev1beta1
if !found {
return "", fmt.Errorf(`finding node instance type "%s"`, nodeClaim.Labels[v1.LabelInstanceTypeStable])
}
amis, err := c.amiProvider.Get(ctx, nodeClass, &amifamily.Options{})
if err != nil {
return "", fmt.Errorf("getting amis, %w", err)
}
if len(amis) == 0 {
if len(nodeClass.Status.AMIs) == 0 {
return "", fmt.Errorf("no amis exist given constraints")
}
mappedAMIs := amis.MapToInstanceTypes([]*cloudprovider.InstanceType{nodeInstanceType})
mappedAMIs := amifamily.MapToInstanceTypes([]*cloudprovider.InstanceType{nodeInstanceType}, nodeClass.Status.AMIs)
if !lo.Contains(lo.Keys(mappedAMIs), instance.ImageID) {
return AMIDrift, nil
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
"sigs.k8s.io/karpenter/pkg/events"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/operator/scheme"
"sigs.k8s.io/karpenter/pkg/scheduling"
coretest "sigs.k8s.io/karpenter/pkg/test"

. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -138,6 +139,40 @@ var _ = Describe("CloudProvider", func() {
},
},
})
amdRequirements := scheduling.NewRequirements()
amdRequirements.Add(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64))
nodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: "ami-test1",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
{
ID: "ami-test2",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test3",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test4",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand Down Expand Up @@ -586,6 +621,20 @@ var _ = Describe("CloudProvider", func() {
},
},
})
nodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: armAMIID,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64),
).NodeSelectorRequirements(),
},
{
ID: amdAMIID,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
).NodeSelectorRequirements(),
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -733,13 +782,25 @@ var _ = Describe("CloudProvider", func() {
})
It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() {
nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMIID}}
nodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: amdAMIID,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
).NodeSelectorRequirements(),
},
}
ExpectApplied(ctx, env.Client, nodeClass)
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.AMIDrift))
})
Context("Static Drift Detection", func() {
BeforeEach(func() {
armRequirements := scheduling.NewRequirements()
armRequirements.Add(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, "arm64"))
amdRequirements := scheduling.NewRequirements()
amdRequirements.Add(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, "x86_64"))
nodeClass = &v1beta1.EC2NodeClass{
ObjectMeta: nodeClass.ObjectMeta,
Spec: v1beta1.EC2NodeClassSpec{
Expand Down Expand Up @@ -777,6 +838,18 @@ var _ = Describe("CloudProvider", func() {
},
},
},
Status: v1beta1.EC2NodeClassStatus{
AMIs: []v1beta1.AMI{
{
ID: armAMIID,
Requirements: armRequirements.NodeSelectorRequirements(),
},
{
ID: amdAMIID,
Requirements: amdRequirements.NodeSelectorRequirements(),
},
},
},
}
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
Expand Down Expand Up @@ -1013,6 +1086,9 @@ var _ = Describe("CloudProvider", func() {
},
},
},
Status: v1beta1.EC2NodeClassStatus{
AMIs: nodeClass.Status.AMIs,
},
})
nodePool2 := coretest.NodePool(corev1beta1.NodePool{
Spec: corev1beta1.NodePoolSpec{
Expand Down
21 changes: 13 additions & 8 deletions pkg/providers/amifamily/ami.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"
"sort"
"strings"
"sync"
"time"

"github.com/aws/aws-sdk-go/aws"
Expand All @@ -45,6 +46,7 @@ type Provider interface {
}

type DefaultProvider struct {
sync.RWMutex
cache *cache.Cache
ssm ssmiface.SSMAPI
ec2api ec2iface.EC2API
Expand Down Expand Up @@ -79,25 +81,25 @@ func (a AMIs) Sort() {
})
}

func (a AMIs) String() string {
func String(amis []v1beta1.AMI) string {
var sb strings.Builder
ids := lo.Map(a, func(a AMI, _ int) string { return a.AmiID })
if len(a) > 25 {
ids := lo.Map(amis, func(a v1beta1.AMI, _ int) string { return a.ID })
if len(amis) > 25 {
sb.WriteString(strings.Join(ids[:25], ", "))
sb.WriteString(fmt.Sprintf(" and %d other(s)", len(a)-25))
sb.WriteString(fmt.Sprintf(" and %d other(s)", len(amis)-25))
} else {
sb.WriteString(strings.Join(ids, ", "))
}
return sb.String()
}

// MapToInstanceTypes returns a map of AMIIDs that are the most recent on creationDate to compatible instancetypes
func (a AMIs) MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType) map[string][]*cloudprovider.InstanceType {
func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1beta1.AMI) map[string][]*cloudprovider.InstanceType {
amiIDs := map[string][]*cloudprovider.InstanceType{}
for _, instanceType := range instanceTypes {
for _, ami := range a {
if err := instanceType.Requirements.Compatible(ami.Requirements, scheduling.AllowUndefinedWellKnownLabels); err == nil {
amiIDs[ami.AmiID] = append(amiIDs[ami.AmiID], instanceType)
for _, ami := range amis {
if err := instanceType.Requirements.Compatible(scheduling.NewNodeSelectorRequirementsWithMinValues(ami.Requirements...), scheduling.AllowUndefinedWellKnownLabels); err == nil {
amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType)
break
}
}
Expand All @@ -117,6 +119,9 @@ func NewDefaultProvider(versionProvider version.Provider, ssm ssmiface.SSMAPI, e

// Get Returning a list of AMIs with its associated requirements
func (p *DefaultProvider) Get(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, options *Options) (AMIs, error) {
p.Lock()
defer p.Unlock()

var err error
var amis AMIs
if len(nodeClass.Spec.AMISelectorTerms) == 0 {
Expand Down
10 changes: 3 additions & 7 deletions pkg/providers/amifamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,12 @@ func NewResolver(amiProvider Provider) *Resolver {
// Multiple ResolvedTemplates are returned based on the instanceTypes passed in to support special AMIs for certain instance types like GPUs.
func (r Resolver) Resolve(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, options *Options) ([]*LaunchTemplate, error) {
amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, options)
amis, err := r.amiProvider.Get(ctx, nodeClass, options)
if err != nil {
return nil, err
}
if len(amis) == 0 {
if len(nodeClass.Status.AMIs) == 0 {
return nil, fmt.Errorf("no amis exist given constraints")
}
mappedAMIs := amis.MapToInstanceTypes(instanceTypes)
mappedAMIs := MapToInstanceTypes(instanceTypes, nodeClass.Status.AMIs)
if len(mappedAMIs) == 0 {
return nil, fmt.Errorf("no instance types satisfy requirements of amis %v", amis)
return nil, fmt.Errorf("no instance types satisfy requirements of amis %v", String(nodeClass.Status.AMIs))
}
var resolvedTemplates []*LaunchTemplate
for amiID, instanceTypes := range mappedAMIs {
Expand Down
34 changes: 34 additions & 0 deletions pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
Expand All @@ -31,6 +32,7 @@ import (
"sigs.k8s.io/karpenter/pkg/events"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/operator/scheme"
"sigs.k8s.io/karpenter/pkg/scheduling"
coretest "sigs.k8s.io/karpenter/pkg/test"

"github.com/aws/karpenter-provider-aws/pkg/apis"
Expand Down Expand Up @@ -106,6 +108,38 @@ var _ = Describe("InstanceProvider", func() {
},
},
})
nodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: "ami-test1",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
{
ID: "ami-test2",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test3",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test4",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand Down
42 changes: 42 additions & 0 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,48 @@ var _ = Describe("InstanceTypeProvider", func() {
},
},
})
nodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: "ami-test1",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
{
ID: "ami-test2",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test3",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpExists),
).NodeSelectorRequirements(),
},
{
ID: "ami-test4",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64),
scheduling.NewRequirement(v1beta1.LabelInstanceGPUCount, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1beta1.LabelInstanceAcceleratorCount, v1.NodeSelectorOpDoesNotExist),
).NodeSelectorRequirements(),
},
}
windowsNodeClass.Status.AMIs = []v1beta1.AMI{
{
ID: "ami-window-test1",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, string(v1.Windows)),
scheduling.NewRequirement(v1.LabelWindowsBuild, v1.NodeSelectorOpIn, v1beta1.Windows2022Build),
).NodeSelectorRequirements(),
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand Down
Loading

0 comments on commit 5b2a3dc

Please sign in to comment.