From 48c14fadd13a061687ba41c33f6e0cfff9d6fc54 Mon Sep 17 00:00:00 2001 From: Jigisha Patil <89548848+jigisha620@users.noreply.github.com> Date: Tue, 5 Nov 2024 09:43:29 -0800 Subject: [PATCH] Fix pod metrics that don't get deleted when the pod is deleted (#1796) --- pkg/controllers/metrics/pod/controller.go | 10 ++++++++ pkg/controllers/metrics/pod/suite_test.go | 29 +++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/pkg/controllers/metrics/pod/controller.go b/pkg/controllers/metrics/pod/controller.go index ceb644859..81a63ef58 100644 --- a/pkg/controllers/metrics/pod/controller.go +++ b/pkg/controllers/metrics/pod/controller.go @@ -147,7 +147,17 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco if err := c.kubeClient.Get(ctx, req.NamespacedName, pod); err != nil { if errors.IsNotFound(err) { c.pendingPods.Delete(req.NamespacedName.String()) + // Delete the unstarted metric since the pod is deleted + podUnstartedTimeSeconds.Delete(map[string]string{ + podName: req.Name, + podNamespace: req.Namespace, + }) c.unscheduledPods.Delete(req.NamespacedName.String()) + // Delete the unbound metric since the pod is deleted + podCurrentUnboundTimeSeconds.Delete(map[string]string{ + podName: req.Name, + podNamespace: req.Namespace, + }) c.metricStore.Delete(req.NamespacedName.String()) } return reconcile.Result{}, client.IgnoreNotFound(err) diff --git a/pkg/controllers/metrics/pod/suite_test.go b/pkg/controllers/metrics/pod/suite_test.go index 875a71263..8ffdbe5de 100644 --- a/pkg/controllers/metrics/pod/suite_test.go +++ b/pkg/controllers/metrics/pod/suite_test.go @@ -177,6 +177,35 @@ var _ = Describe("Pod Metrics", func() { _, found = FindMetricWithLabelValues("karpenter_pods_startup_duration_seconds", nil) Expect(found).To(BeTrue()) }) + It("should delete pod unstarted time and pod unbound duration metric on pod delete", func() { + p := test.Pod() + p.Status.Phase = corev1.PodPending + ExpectApplied(ctx, env.Client, p) + ExpectReconcileSucceeded(ctx, podController, client.ObjectKeyFromObject(p)) + _, found := FindMetricWithLabelValues("karpenter_pods_current_unbound_time_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeTrue()) + _, found = FindMetricWithLabelValues("karpenter_pods_unstarted_time_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeTrue()) + + ExpectDeleted(ctx, env.Client, p) + ExpectReconcileSucceeded(ctx, podController, client.ObjectKeyFromObject(p)) + _, found = FindMetricWithLabelValues("karpenter_pods_current_unbound_time_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeFalse()) + _, found = FindMetricWithLabelValues("karpenter_pods_unstarted_time_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeFalse()) + }) It("should delete the pod state metric on pod delete", func() { p := test.Pod() ExpectApplied(ctx, env.Client, p)