diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 894b64eac927..1264109d4fb3 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -227,8 +227,8 @@ func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { return []status.Object{&v1.EC2NodeClass{}} } -func (c *CloudProvider) RepairPolicy() []cloudprovider.RepairStatements { - return []cloudprovider.RepairStatements{ +func (c *CloudProvider) RepairPolicy() []cloudprovider.RepairStatement { + return []cloudprovider.RepairStatement{ // Supported Kubelet fields { Type: "Ready", @@ -237,12 +237,12 @@ func (c *CloudProvider) RepairPolicy() []cloudprovider.RepairStatements { }, { Type: "DiskPressure", - Status: corev1.ConditionFalse, + Status: corev1.ConditionTrue, TolerationDuration: 30 * time.Minute, }, { Type: "MemoryPressure", - Status: corev1.ConditionFalse, + Status: corev1.ConditionTrue, TolerationDuration: 30 * time.Minute, }, } diff --git a/pkg/fake/cloudprovider.go b/pkg/fake/cloudprovider.go index f6e3c4c83475..4ba71652ce49 100644 --- a/pkg/fake/cloudprovider.go +++ b/pkg/fake/cloudprovider.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - "sigs.k8s.io/karpenter/pkg/cloudprovider" corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/test" @@ -91,6 +90,6 @@ func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { return []status.Object{&v1.EC2NodeClass{}} } -func (c *CloudProvider) RepairPolicy() []cloudprovider.RepairStatements { - return []cloudprovider.RepairStatements{} +func (c *CloudProvider) RepairPolicy() []corecloudprovider.RepairStatement { + return []corecloudprovider.RepairStatement{} } diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 4112b24c24dd..1e8f2a4dab83 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -89,6 +89,27 @@ func (env *Environment) ExpectUpdated(objects ...client.Object) { } } +// ExpectStatusUpdated will update objects in the cluster to match the inputs. +// WARNING: This ignores the resource version check, which can result in +// overwriting changes made by other controllers in the cluster. +// This is useful in ensuring that we can clean up resources by patching +// out finalizers. +// Grab the object before making the updates to reduce the chance of this race. +func (env *Environment) ExpectStatusUpdated(objects ...client.Object) { + GinkgoHelper() + for _, o := range objects { + Eventually(func(g Gomega) { + current := o.DeepCopyObject().(client.Object) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(current), current)).To(Succeed()) + if current.GetResourceVersion() != o.GetResourceVersion() { + log.FromContext(env).Info(fmt.Sprintf("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme())))) + } + o.SetResourceVersion(current.GetResourceVersion()) + g.Expect(env.Client.Status().Update(env.Context, o)).To(Succeed()) + }).WithTimeout(time.Second * 10).Should(Succeed()) + } +} + // ExpectCreatedOrUpdated can update objects in the cluster to match the inputs. // WARNING: ExpectUpdated ignores the resource version check, which can result in // overwriting changes made by other controllers in the cluster. diff --git a/test/suites/integration/repair_policy_test.go b/test/suites/integration/repair_policy_test.go new file mode 100644 index 000000000000..47478a406429 --- /dev/null +++ b/test/suites/integration/repair_policy_test.go @@ -0,0 +1,85 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + karpenterv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + coretest "sigs.k8s.io/karpenter/pkg/test" + + . "github.com/onsi/ginkgo/v2" + "github.com/samber/lo" +) + +var _ = Describe("Repair Policy", func() { + var selector labels.Selector + var dep *appsv1.Deployment + var numPods int + + BeforeEach(func() { + numPods = 1 + // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "my-app", + }, + Annotations: map[string]string{ + karpenterv1.DoNotDisruptAnnotationKey: "true", + }, + }, + TerminationGracePeriodSeconds: lo.ToPtr[int64](0), + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + }) + + FDescribeTable("Conditions", func(unhealthyCondition corev1.NodeCondition) { + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + node.Status.Conditions = append(node.Status.Conditions, unhealthyCondition) + + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }, + Entry("Kubelet Readiness", corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-30 * time.Minute)}, + }), + Entry("Kubelet DiskPressure", corev1.NodeCondition{ + Type: corev1.NodeDiskPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-30 * time.Minute)}, + }), + Entry("Kubelet MemoryPressure", corev1.NodeCondition{ + Type: corev1.NodeMemoryPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-30 * time.Minute)}, + }), + ) +})