Skip to content

Commit

Permalink
chore: Wait for instance termination before deleting nodeclaim
Browse files Browse the repository at this point in the history
  • Loading branch information
jigisha620 committed Apr 22, 2024
1 parent 79fc601 commit 9ba9f74
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ e2etests: ## Run the e2e suite against your local cluster
go test \
-p 1 \
-count 1 \
-timeout 3h \
-timeout 5h \
-v \
./suites/$(shell echo $(TEST_SUITE) | tr A-Z a-z)/... \
--ginkgo.focus="${FOCUS}" \
Expand Down
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ require (
k8s.io/utils v0.0.0-20240102154912-e7106e64919e
knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd
sigs.k8s.io/controller-runtime v0.17.3
sigs.k8s.io/karpenter v0.36.0
sigs.k8s.io/karpenter v0.36.1-0.20240416012831-c29c8ff7d520
sigs.k8s.io/yaml v1.4.0
)

Expand Down Expand Up @@ -79,7 +79,7 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/statsd_exporter v0.24.0 // indirect
Expand Down Expand Up @@ -115,3 +115,5 @@ require (
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

replace sigs.k8s.io/karpenter => github.com/jigisha620/karpenter v0.0.0-20240419222931-5128a6f8810b
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jigisha620/karpenter v0.0.0-20240419222931-5128a6f8810b h1:4BeBE/jyi3D/lqFwThKmVGOKF8L23VjZZl112BP6lug=
github.com/jigisha620/karpenter v0.0.0-20240419222931-5128a6f8810b/go.mod h1:UDKl9xLrLxi9mRa7DfqOal42Y386AEy4CxRAzKbGy0k=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
Expand Down Expand Up @@ -299,8 +301,8 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
Expand Down Expand Up @@ -759,8 +761,6 @@ sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9
sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.36.0 h1:i82fOsFWKwnChedKsj0Hep2yrTkAjCek/aZPSMX2dW8=
sigs.k8s.io/karpenter v0.36.0/go.mod h1:fieFojxOec/l0tDmFT7R+g/Y+SGQbL9VlcYO8xb3sLo=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
25 changes: 25 additions & 0 deletions test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,31 @@ func (env *Environment) ConsistentlyExpectDisruptionsWithNodeCount(disruptingNod
return lo.ToSlicePtr(nodes)
}

// EventuallyExpectDisruptionsWithNodeCount will eventually ensure that there are exactly disruptingNodes with totalNodes (including replacements and existing nodes)
func (env *Environment) EventuallyExpectDisruptionsWithNodeCount(disruptingNodes, totalNodes int) (taintedNodes []*v1.Node) {
GinkgoHelper()
nodes := []v1.Node{}
Eventually(func(g Gomega) {
// Ensure we don't change our NodeClaims
nodeClaimList := &corev1beta1.NodeClaimList{}
g.Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeClaimList.Items).To(HaveLen(totalNodes))

nodeList := &v1.NodeList{}
g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
g.Expect(nodeList.Items).To(HaveLen(totalNodes))

nodes = lo.Filter(nodeList.Items, func(n v1.Node, _ int) bool {
_, ok := lo.Find(n.Spec.Taints, func(t v1.Taint) bool {
return corev1beta1.IsDisruptingTaint(t)
})
return ok
})
g.Expect(nodes).To(HaveLen(disruptingNodes))
}).Should(Succeed())
return lo.ToSlicePtr(nodes)
}

func (env *Environment) EventuallyExpectTaintedNodeCount(comparator string, count int) []*v1.Node {
GinkgoHelper()
By(fmt.Sprintf("waiting for tainted nodes to be %s to %d", comparator, count))
Expand Down
2 changes: 1 addition & 1 deletion test/suites/consolidation/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ var _ = Describe("Consolidation", func() {

// This check ensures that we are consolidating nodes at the same time
env.EventuallyExpectTaintedNodeCount("==", 2)
nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second)
nodes = env.EventuallyExpectDisruptionsWithNodeCount(2, 3)

for _, node := range nodes {
Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed())
Expand Down
9 changes: 1 addition & 8 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ var _ = Describe("Drift", func() {
env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim)
})
Context("Failure", func() {
It("should not continue to drift if a node never registers", func() {
It("should not disrupt existing nodes if after drift new nodes fail to register", func() {
// launch a new nodeClaim
var numPods int32 = 2
dep := coretest.Deployment(coretest.DeploymentOptions{
Expand Down Expand Up @@ -888,13 +888,6 @@ var _ = Describe("Drift", func() {
// TODO: reduce timeouts when disruption waits are factored out
env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...)

// We give another 6 minutes here to handle the deletion at the 15m registration timeout
Eventually(func(g Gomega) {
nodeClaims := &corev1beta1.NodeClaimList{}
g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed())
g.Expect(nodeClaims.Items).To(HaveLen(int(numPods)))
}).WithTimeout(6 * time.Minute).Should(Succeed())

// Expect all the NodeClaims that existed on the initial provisioning loop are not removed
Consistently(func(g Gomega) {
nodeClaims := &corev1beta1.NodeClaimList{}
Expand Down
7 changes: 0 additions & 7 deletions test/suites/expiration/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,13 +614,6 @@ var _ = Describe("Expiration", func() {
// TODO: reduce timeouts when deprovisioning waits are factored out
env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...)

// The nodeclaims that never registers will be removed
Eventually(func(g Gomega) {
nodeClaims := &corev1beta1.NodeClaimList{}
g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed())
g.Expect(len(nodeClaims.Items)).To(BeNumerically("==", int(numPods)))
}).WithTimeout(6 * time.Minute).Should(Succeed())

// Expect all the NodeClaims that existed on the initial provisioning loop are not removed
Consistently(func(g Gomega) {
nodeClaims := &corev1beta1.NodeClaimList{}
Expand Down
54 changes: 48 additions & 6 deletions test/suites/nodeclaim/nodeclaim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ import (
"os"
"time"

"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/karpenter/pkg/utils/resources"

"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
"sigs.k8s.io/karpenter/pkg/test"
"sigs.k8s.io/karpenter/pkg/utils/resources"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"

Expand Down Expand Up @@ -226,7 +226,7 @@ var _ = Describe("StandaloneNodeClaim", func() {
env.EventuallyExpectNotFound(nodeClaim, node)

Eventually(func(g Gomega) {
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("shutting-down"))
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("terminated"))
}, time.Second*10).Should(Succeed())
})
It("should delete a NodeClaim from the node termination finalizer", func() {
Expand Down Expand Up @@ -260,12 +260,12 @@ var _ = Describe("StandaloneNodeClaim", func() {
instanceID := env.ExpectParsedProviderID(node.Spec.ProviderID)
env.GetInstance(node.Name)

// Delete the node and expect both the node and nodeClaim to be gone as well as the instance to be shutting-down
// Delete the node and expect both the node and nodeClaim to be gone as well as the instance to be terminated
env.ExpectDeleted(node)
env.EventuallyExpectNotFound(nodeClaim, node)

Eventually(func(g Gomega) {
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("shutting-down"))
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("terminated"))
}, time.Second*10).Should(Succeed())
})
It("should create a NodeClaim with custom labels passed through the userData", func() {
Expand Down Expand Up @@ -377,4 +377,46 @@ var _ = Describe("StandaloneNodeClaim", func() {
// Expect that the nodeClaim is eventually de-provisioned due to the registration timeout
env.EventuallyExpectNotFoundAssertion(nodeClaim).WithTimeout(time.Minute * 20).Should(Succeed())
})
It("should wait for instance termination before removing finalizer from nodeClaim", func() {
nodeClaim := test.NodeClaim(corev1beta1.NodeClaim{
Spec: corev1beta1.NodeClaimSpec{
Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{
{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: v1beta1.LabelInstanceCategory,
Operator: v1.NodeSelectorOpIn,
Values: []string{"c"},
},
},
{
NodeSelectorRequirement: v1.NodeSelectorRequirement{
Key: corev1beta1.CapacityTypeLabelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{corev1beta1.CapacityTypeOnDemand},
},
},
},
NodeClassRef: &corev1beta1.NodeClassReference{
Name: nodeClass.Name,
},
},
})
env.ExpectCreated(nodeClass, nodeClaim)
node := env.EventuallyExpectInitializedNodeCount("==", 1)[0]
nodeClaim = env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0]

instanceID := env.ExpectParsedProviderID(node.Spec.ProviderID)
env.GetInstance(node.Name)

env.ExpectDeleted(nodeClaim)
Eventually(func(g Gomega) {
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("shutting-down"))
}, time.Second*10).Should(Succeed())

Expect(nodeClaim.Finalizers).Should(ContainElement(corev1beta1.TerminationFinalizer))
env.EventuallyExpectNotFound(nodeClaim, node)
Eventually(func(g Gomega) {
g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("terminated"))
}, time.Second*10).Should(Succeed())
})
})

0 comments on commit 9ba9f74

Please sign in to comment.