Skip to content

Commit

Permalink
Use Security Groups status for node launch
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Apr 25, 2024
1 parent b7745bf commit 1cab944
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 26 deletions.
12 changes: 4 additions & 8 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *corev
if err != nil {
return "", fmt.Errorf("calculating ami drift, %w", err)
}
securitygroupDrifted, err := c.areSecurityGroupsDrifted(ctx, instance, nodeClass)
securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeClass)
if err != nil {
return "", fmt.Errorf("calculating securitygroup drift, %w", err)
}
Expand Down Expand Up @@ -118,14 +118,10 @@ func (c *CloudProvider) isSubnetDrifted(ctx context.Context, instance *instance.

// Checks if the security groups are drifted, by comparing the security groups returned from the SecurityGroupProvider
// to the ec2 instance security groups
func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securitygroup, err := c.securityGroupProvider.List(ctx, nodeClass)
if err != nil {
return "", err
}
securityGroupIds := sets.New(lo.Map(securitygroup, func(sg *ec2.SecurityGroup, _ int) string { return aws.StringValue(sg.GroupId) })...)
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securityGroupIds := sets.New(lo.Map(nodeClass.Status.SecurityGroups, func(sg v1beta1.SecurityGroup, _ int) string { return sg.ID })...)
if len(securityGroupIds) == 0 {
return "", fmt.Errorf("no security groups are discovered")
return "", fmt.Errorf("no security groups are present in the status")
}

if !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)) {
Expand Down
60 changes: 56 additions & 4 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,19 @@ import (
"github.com/aws/karpenter-provider-aws/pkg/apis"
"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
"github.com/aws/karpenter-provider-aws/pkg/cloudprovider"
"github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status"
"github.com/aws/karpenter-provider-aws/pkg/fake"
"github.com/aws/karpenter-provider-aws/pkg/operator/options"
"github.com/aws/karpenter-provider-aws/pkg/test"

"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
corecloudproivder "sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/controllers/provisioning"
"sigs.k8s.io/karpenter/pkg/controllers/state"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/operator/controller"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/operator/scheme"
coretest "sigs.k8s.io/karpenter/pkg/test"
Expand All @@ -67,6 +71,7 @@ var cloudProvider *cloudprovider.CloudProvider
var cluster *state.Cluster
var fakeClock *clock.FakeClock
var recorder events.Recorder
var statusController controller.Controller

func TestAWS(t *testing.T) {
ctx = TestContextWithLogger(t)
Expand All @@ -86,6 +91,14 @@ var _ = BeforeSuite(func() {
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster)
statusController = status.NewController(
env.Client,
awsEnv.SubnetProvider,
awsEnv.SecurityGroupProvider,
awsEnv.AMIProvider,
awsEnv.InstanceProfileProvider,
awsEnv.LaunchTemplateProvider,
)
})

var _ = AfterSuite(func() {
Expand Down Expand Up @@ -153,19 +166,22 @@ var _ = Describe("CloudProvider", func() {
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(corecloudproivder.IsInsufficientCapacityError(err)).To(BeTrue())
Expect(cloudProviderNodeClaim).To(BeNil())
})
It("should set ImageID in the status field of the nodeClaim", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).To(BeNil())
Expect(cloudProviderNodeClaim).ToNot(BeNil())
Expect(cloudProviderNodeClaim.Status.ImageID).ToNot(BeEmpty())
})
It("should return NodeClass Hash on the nodeClaim", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).To(BeNil())
Expect(cloudProviderNodeClaim).ToNot(BeNil())
Expand All @@ -174,6 +190,7 @@ var _ = Describe("CloudProvider", func() {
})
It("should return NodeClass Hash Version on the nodeClaim", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).To(BeNil())
Expect(cloudProviderNodeClaim).ToNot(BeNil())
Expand All @@ -186,6 +203,7 @@ var _ = Describe("CloudProvider", func() {
It("should set context on the CreateFleet request if specified on the NodePool", func() {
nodeClass.Spec.Context = aws.String(contextID)
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
pod := coretest.UnschedulablePod()
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
Expand All @@ -195,6 +213,7 @@ var _ = Describe("CloudProvider", func() {
})
It("should default to no EC2 Context", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
pod := coretest.UnschedulablePod()
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
Expand Down Expand Up @@ -268,6 +287,7 @@ var _ = Describe("CloudProvider", func() {
})

ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))

// 2 pods are created with resources such that both fit together only in one of the 2 InstanceTypes created above.
pod1 := coretest.UnschedulablePod(
Expand Down Expand Up @@ -364,6 +384,7 @@ var _ = Describe("CloudProvider", func() {
})

ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))

// 2 pods are created with resources such that both fit together only in one of the 2 InstanceTypes created above.
pod1 := coretest.UnschedulablePod(
Expand Down Expand Up @@ -470,6 +491,7 @@ var _ = Describe("CloudProvider", func() {
})

ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
pod1 := coretest.UnschedulablePod(
coretest.PodOptions{
ResourceRequirements: v1.ResourceRequirements{Requests: v1.ResourceList{
Expand Down Expand Up @@ -565,8 +587,10 @@ var _ = Describe("CloudProvider", func() {
awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{
Subnets: []*ec2.Subnet{
{
SubnetId: aws.String(validSubnet1),
AvailabilityZone: aws.String("zone-1"),
SubnetId: aws.String(validSubnet1),
AvailabilityZone: aws.String("zone-1"),
AvailableIpAddressCount: aws.Int64(100),
MapPublicIpOnLaunch: aws.Bool(false),
Tags: []*ec2.Tag{
{
Key: aws.String("sn-key-1"),
Expand All @@ -575,8 +599,10 @@ var _ = Describe("CloudProvider", func() {
},
},
{
SubnetId: aws.String(validSubnet2),
AvailabilityZone: aws.String("zone-2"),
SubnetId: aws.String(validSubnet2),
AvailabilityZone: aws.String("zone-2"),
AvailableIpAddressCount: aws.Int64(100),
MapPublicIpOnLaunch: aws.Bool(false),
Tags: []*ec2.Tag{
{
Key: aws.String("sn-key-2"),
Expand All @@ -587,6 +613,7 @@ var _ = Describe("CloudProvider", func() {
},
})
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool)
Expect(err).ToNot(HaveOccurred())
selectedInstanceType = instanceTypes[0]
Expand Down Expand Up @@ -621,6 +648,8 @@ var _ = Describe("CloudProvider", func() {
nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name})
})
It("should not fail if NodeClass does not exist", func() {
controllerutil.RemoveFinalizer(nodeClass, v1beta1.TerminationFinalizer)
ExpectApplied(ctx, env.Client, nodeClass)
ExpectDeleted(ctx, env.Client, nodeClass)
drifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -674,6 +703,8 @@ var _ = Describe("CloudProvider", func() {
awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{}})
// Instance is a reference to what we return in the GetInstances call
instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}}
awsEnv.SecurityGroupCache.Flush()
ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
_, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).To(HaveOccurred())
})
Expand Down Expand Up @@ -704,6 +735,8 @@ var _ = Describe("CloudProvider", func() {
},
},
})
awsEnv.SecurityGroupCache.Flush()
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
Expand Down Expand Up @@ -734,6 +767,7 @@ var _ = Describe("CloudProvider", func() {
It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() {
nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMIID}}
ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.AMIDrift))
Expand Down Expand Up @@ -784,6 +818,7 @@ var _ = Describe("CloudProvider", func() {
DescribeTable("should return drifted if a statically drifted EC2NodeClass.Spec field is updated",
func(changes v1beta1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -792,6 +827,7 @@ var _ = Describe("CloudProvider", func() {
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})

ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.NodeClassDrift))
Expand Down Expand Up @@ -831,6 +867,7 @@ var _ = Describe("CloudProvider", func() {
DescribeTable("should not return drifted if dynamic fields are updated",
func(changes v1beta1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -839,6 +876,7 @@ var _ = Describe("CloudProvider", func() {
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})

ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -855,6 +893,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -869,6 +908,7 @@ var _ = Describe("CloudProvider", func() {
v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -886,6 +926,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -903,6 +944,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -914,6 +956,7 @@ var _ = Describe("CloudProvider", func() {
// hard coded fixture data (ex. what the aws api will return) is maintained in fake/ec2api.go
It("should default to the cluster's subnets", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
pod := coretest.UnschedulablePod(
coretest.PodOptions{NodeSelector: map[string]string{v1.LabelArchStable: corev1beta1.ArchitectureAmd64}})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
Expand Down Expand Up @@ -945,6 +988,8 @@ var _ = Describe("CloudProvider", func() {
Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}},
}})
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))

pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
Expand All @@ -960,6 +1005,7 @@ var _ = Describe("CloudProvider", func() {
}})
nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{MaxPods: aws.Int32(1)}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
pod1 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}})
pod2 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2)
Expand Down Expand Up @@ -994,6 +1040,7 @@ var _ = Describe("CloudProvider", func() {
}})
nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
podSubnet1 := coretest.UnschedulablePod()
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1)
ExpectScheduled(ctx, env.Client, podSubnet1)
Expand Down Expand Up @@ -1026,6 +1073,7 @@ var _ = Describe("CloudProvider", func() {
},
})
ExpectApplied(ctx, env.Client, nodePool2, nodeClass2)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass2))
podSubnet2 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1beta1.NodePoolLabelKey: nodePool2.Name}})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet2)
ExpectScheduled(ctx, env.Client, podSubnet2)
Expand Down Expand Up @@ -1067,6 +1115,8 @@ var _ = Describe("CloudProvider", func() {
},
})
ExpectApplied(ctx, env.Client, nodePool, nodePool2, nodeClass, misconfiguredNodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(misconfiguredNodeClass))
pod := coretest.UnschedulablePod()
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
Expand All @@ -1085,6 +1135,7 @@ var _ = Describe("CloudProvider", func() {
}
nodeClaim.Spec.Resources.Requests = v1.ResourceList{v1beta1.ResourceEFA: resource.MustParse("1")}
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).To(BeNil())
Expect(lo.Keys(cloudProviderNodeClaim.Status.Allocatable)).To(ContainElement(v1beta1.ResourceEFA))
Expand All @@ -1100,6 +1151,7 @@ var _ = Describe("CloudProvider", func() {
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim)
Expect(err).To(BeNil())
Expect(lo.Keys(cloudProviderNodeClaim.Status.Allocatable)).ToNot(ContainElement(v1beta1.ResourceEFA))
Expand Down
Loading

0 comments on commit 1cab944

Please sign in to comment.