Skip to content

Commit

Permalink
Drop replacement testing and ensure parallelism for tainting
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Jan 25, 2024
1 parent 4b6a4b4 commit a8bc7dc
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 171 deletions.
15 changes: 14 additions & 1 deletion test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ func (env *Environment) ExpectNodeCount(comparator string, count int) {

func (env *Environment) ExpectNodeClaimCount(comparator string, count int) {
GinkgoHelper()

nodeClaimList := &corev1beta1.NodeClaimList{}
Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count))
Expand Down Expand Up @@ -527,6 +527,19 @@ func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration
}, duration).Should(Succeed())
}

func (env *Environment) ConsistentlyExpectTaintedNodeCount(comparator string, count int, duration string) []*v1.Node {
GinkgoHelper()

By(fmt.Sprintf("checking for tainted nodes to be %s to %d for %s", comparator, count, duration))
nodeList := &v1.NodeList{}
Consistently(func(g Gomega) {
g.Expect(env.Client.List(env, nodeList, client.MatchingFields{"spec.taints[*].karpenter.sh/disruption": "disrupting"})).To(Succeed())
g.Expect(len(nodeList.Items)).To(BeNumerically(comparator, count),
fmt.Sprintf("expected %d tainted nodes, had %d (%v)", count, len(nodeList.Items), NodeNames(lo.ToSlicePtr(nodeList.Items))))
}, duration).Should(Succeed())
return lo.ToSlicePtr(nodeList.Items)
}

func (env *Environment) EventuallyExpectTaintedNodeCount(comparator string, count int) []*v1.Node {
GinkgoHelper()
By(fmt.Sprintf("waiting for tainted nodes to be %s to %d", comparator, count))
Expand Down
9 changes: 7 additions & 2 deletions test/suites/consolidation/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,9 @@ var _ = Describe("Consolidation", func() {
// Update the deployment to only contain 1 replica.
env.ExpectUpdated(dep)

// This check ensures that we are consolidating nodes at the same time
// Ensure that we get two nodes tainted, and they have overlap during the drift
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

// Remove the finalizer from each node so that we can terminate
for _, node := range nodes {
Expand All @@ -144,7 +145,7 @@ var _ = Describe("Consolidation", func() {
env.EventuallyExpectNotFound(nodes[0], nodes[1])

// Expect there to only be one node remaining for the last replica
env.ExpectNodeClaimCount("==", 1)
env.ExpectNodeCount("==", 1)
})
It("should respect budgets for non-empty delete consolidation", func() {
// This test will hold consolidation until we are ready to execute it
Expand Down Expand Up @@ -203,11 +204,15 @@ var _ = Describe("Consolidation", func() {
nodePool.Spec.Disruption.ConsolidateAfter = nil
env.ExpectUpdated(nodePool)

// Ensure that we get two nodes tainted, and they have overlap during the drift
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

for _, node := range nodes {
Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed())
}
env.EventuallyExpectNotFound(nodes[0], nodes[1])
env.ExpectNodeCount("==", 1)
})
It("should not allow consolidation if the budget is fully blocking", func() {
// We're going to define a budget that doesn't allow any consolidation to happen
Expand Down
86 changes: 5 additions & 81 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ var _ = Describe("Drift", func() {

env.EventuallyExpectDrifted(nodeClaims...)

// Ensure that we get two nodes tainted, and they have overlap during the drift
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

// Remove the finalizer from each node so that we can terminate
for _, node := range nodes {
Expand Down Expand Up @@ -246,7 +248,10 @@ var _ = Describe("Drift", func() {
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodes[0]), nodes[0])).To(Succeed())
nodes[0].Spec.Unschedulable = false
env.ExpectUpdated(nodes[0])

// Ensure that we get two nodes tainted, and they have overlap during the drift
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

By("removing the finalizer from the nodes")
Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed())
Expand All @@ -256,87 +261,6 @@ var _ = Describe("Drift", func() {
// the node should be gone
env.EventuallyExpectNotFound(nodes[0], nodes[1])
})
It("should respect budgets for non-empty replace drift", func() {
nodePool = coretest.ReplaceRequirements(nodePool,
v1.NodeSelectorRequirement{
Key: v1beta1.LabelInstanceSize,
Operator: v1.NodeSelectorOpIn,
Values: []string{"2xlarge"},
},
)
// We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time.
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Nodes: "50%",
}}
var numPods int32 = 3
dep = coretest.Deployment(coretest.DeploymentOptions{
Replicas: numPods,
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
corev1beta1.DoNotDisruptAnnotationKey: "true",
},
Labels: map[string]string{"app": "large-app"},
},
// Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5"),
},
},
},
})
selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels)
env.ExpectCreated(nodeClass, nodePool, dep)

nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3)
nodes := env.EventuallyExpectCreatedNodeCount("==", 3)
env.EventuallyExpectHealthyPodCount(selector, int(numPods))
env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after drift

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
for _, node := range nodes {
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed())
node.Finalizers = append(node.Finalizers, common.TestingFinalizer)
// Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action
env.ExpectUpdated(node)
}

By("drifting the nodes")
// Drift the nodeclaims
nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"}
env.ExpectUpdated(nodePool)

env.EventuallyExpectDrifted(nodeClaims...)

By("enabling disruption by removing the do not disrupt annotation")
pods := env.EventuallyExpectHealthyPodCount(selector, 3)
// Remove the do-not-disrupt annotation so that the nodes are now disruptable
for _, pod := range pods {
delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey)
env.ExpectUpdated(pod)
}

// Expect two nodes tainted, and 2 nodes created
tainted := env.EventuallyExpectTaintedNodeCount("==", 2)
env.EventuallyExpectCreatedNodeCount("==", 2)

Expect(env.ExpectTestingFinalizerRemoved(tainted[0])).To(Succeed())
Expect(env.ExpectTestingFinalizerRemoved(tainted[1])).To(Succeed())

env.EventuallyExpectNotFound(tainted[0], tainted[1])

// Expect one node tainted and a one more new node created.
tainted = env.EventuallyExpectTaintedNodeCount("==", 1)
env.EventuallyExpectCreatedNodeCount("==", 3)

Expect(env.ExpectTestingFinalizerRemoved(tainted[0])).To(Succeed())

// After the deletion timestamp is set and all pods are drained
// the node should be gone
env.EventuallyExpectNotFound(nodes[0], nodes[1], nodes[2])
})
It("should not allow drift if the budget is fully blocking", func() {
// We're going to define a budget that doesn't allow any drift to happen
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Expand Down
91 changes: 4 additions & 87 deletions test/suites/expiration/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ var _ = Describe("Expiration", func() {

// Expect that two nodes are tainted.
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

// Remove finalizers
for _, node := range nodes {
Expand Down Expand Up @@ -245,7 +246,10 @@ var _ = Describe("Expiration", func() {
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodes[0]), nodes[0])).To(Succeed())
nodes[0].Spec.Unschedulable = false
env.ExpectUpdated(nodes[0])

// Ensure that we get two nodes tainted, and they have overlap during the expiration
nodes = env.EventuallyExpectTaintedNodeCount("==", 2)
env.ConsistentlyExpectTaintedNodeCount("==", 2, "5s")

By("removing the finalizer from the nodes")
Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed())
Expand All @@ -255,93 +259,6 @@ var _ = Describe("Expiration", func() {
// the node should be gone
env.EventuallyExpectNotFound(nodes[0], nodes[1])
})
It("should respect budgets for non-empty replace expiration", func() {
nodePool = coretest.ReplaceRequirements(nodePool,
v1.NodeSelectorRequirement{
Key: v1beta1.LabelInstanceSize,
Operator: v1.NodeSelectorOpIn,
Values: []string{"2xlarge"},
},
)
// We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time.
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Nodes: "50%",
}}
nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{}
numPods = 3
dep = coretest.Deployment(coretest.DeploymentOptions{
Replicas: int32(numPods),
PodOptions: coretest.PodOptions{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
corev1beta1.DoNotDisruptAnnotationKey: "true",
},
Labels: map[string]string{"app": "large-app"},
},
// Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods.
ResourceRequirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5"),
},
},
},
})
selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels)
env.ExpectCreated(nodeClass, nodePool, dep)

nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3)
nodes := env.EventuallyExpectCreatedNodeCount("==", 3)
env.EventuallyExpectHealthyPodCount(selector, numPods)
env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after drift

By("cordoning and adding finalizer to the nodes")
// Add a finalizer to each node so that we can stop termination disruptions
for _, node := range nodes {
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed())
node.Finalizers = append(node.Finalizers, common.TestingFinalizer)
// Set nodes as unschedulable so that pod nomination doesn't delay disruption for the second disruption action
env.ExpectUpdated(node)
}

By("expiring the nodes")
// Expire the nodeclaims
nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 90)}
env.ExpectUpdated(nodePool)

env.EventuallyExpectExpired(nodeClaims...)

By("enabling disruption by removing the do not disrupt annotation")
pods := env.EventuallyExpectHealthyPodCount(selector, 3)
// Remove the do-not-disrupt annotation so that the nodes are now disruptable
for _, pod := range pods {
delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey)
env.ExpectUpdated(pod)
}

// Expect two nodes tainted and two nodes created
tainted := env.EventuallyExpectTaintedNodeCount("==", 2)
env.EventuallyExpectCreatedNodeCount("==", 2)

Expect(env.ExpectTestingFinalizerRemoved(tainted[0])).To(Succeed())
Expect(env.ExpectTestingFinalizerRemoved(tainted[1])).To(Succeed())

env.EventuallyExpectNotFound(tainted[0], tainted[1])

tainted = env.EventuallyExpectTaintedNodeCount("==", 1)
env.EventuallyExpectCreatedNodeCount("==", 3)

// Set the expireAfter to "Never" to make sure new node isn't deleted
// This is CRITICAL since it prevents nodes that are immediately spun up from immediately being expired and
// racing at the end of the E2E test, leaking node resources into subsequent tests
nodePool.Spec.Disruption.ExpireAfter.Duration = nil
env.ExpectUpdated(nodePool)

Expect(env.ExpectTestingFinalizerRemoved(tainted[0])).To(Succeed())

// After the deletion timestamp is set and all pods are drained
// the node should be gone
env.EventuallyExpectNotFound(nodes[0], nodes[1], nodes[2])
})
It("should not allow expiration if the budget is fully blocking", func() {
// We're going to define a budget that doesn't allow any expirations to happen
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
Expand Down

0 comments on commit a8bc7dc

Please sign in to comment.