Skip to content

Commit

Permalink
UPSTREAM: <carry>: disable etcd readiness checks by default
Browse files Browse the repository at this point in the history
Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177)
and have etcd operator take responsibility for properly reporting etcd readiness.
Justification: kube-apiserver instances get removed from a load balancer when etcd starts
to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness
longer than the readiness timeout is. Thus, it is not necessary to drop connections
in case etcd resumes its readiness before a client connection times out naturally.
This is a downstream patch only as OpenShift's way of using etcd is unique.
  • Loading branch information
ingvagabund committed Jan 22, 2025
1 parent 4b2db1e commit 1b40bbf
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 6 deletions.
18 changes: 16 additions & 2 deletions staging/src/k8s.io/apiserver/pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,13 +574,27 @@ type CompletedConfig struct {
func (c *Config) AddHealthChecks(healthChecks ...healthz.HealthChecker) {
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
c.LivezChecks = append(c.LivezChecks, healthChecks...)
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
c.AddReadyzChecks(healthChecks...)
}

// AddReadyzChecks adds a health check to our config to be exposed by the readyz endpoint
// of our configured apiserver.
func (c *Config) AddReadyzChecks(healthChecks ...healthz.HealthChecker) {
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
// Info(ingvagabund): Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177)
// and have etcd operator take responsibility for properly reporting etcd readiness.
// Justification: kube-apiserver instances get removed from a load balancer when etcd starts
// to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness
// longer than the readiness timeout is. Thus, it is not necessary to drop connections
// in case etcd resumes its readiness before a client connection times out naturally.
// This is a downstream patch only as OpenShift's way of using etcd is unique.
readyzChecks := []healthz.HealthChecker{}
for _, check := range healthChecks {
if check.Name() == "etcd" || check.Name() == "etcd-readiness" {
continue
}
readyzChecks = append(readyzChecks, check)
}
c.ReadyzChecks = append(c.ReadyzChecks, readyzChecks...)
}

// AddPostStartHook allows you to add a PostStartHook that will later be added to the server itself in a New call.
Expand Down
2 changes: 1 addition & 1 deletion staging/src/k8s.io/apiserver/pkg/server/options/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ func (s *EtcdOptions) maybeApplyResourceTransformers(c *server.Config) (err erro

func addHealthChecksWithoutLivez(c *server.Config, healthChecks ...healthz.HealthChecker) {
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
c.AddReadyzChecks(healthChecks...)
}

func (s *EtcdOptions) addEtcdHealthEndpoint(c *server.Config) error {
Expand Down
22 changes: 20 additions & 2 deletions staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,16 @@ func TestKMSHealthzEndpoint(t *testing.T) {
}

healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
// Remove the excluded checks here to reduce the carry patch changes in case
// the changes drifts too much during rebases and similar scope-like changes.
wantReadyzChecks := []string{}
for _, checkName := range tc.wantReadyzChecks {
if checkName == "etcd" || checkName == "etcd-readiness" {
continue
}
wantReadyzChecks = append(wantReadyzChecks, checkName)
}
healthChecksAreEqual(t, wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
})
}
Expand Down Expand Up @@ -407,7 +416,16 @@ func TestReadinessCheck(t *testing.T) {
t.Fatalf("Failed to add healthz error: %v", err)
}

healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
// Remove the excluded checks here to reduce the carry patch changes in case
// the changes drifts too much during rebases and similar scope-like changes.
wantReadyzChecks := []string{}
for _, checkName := range tc.wantReadyzChecks {
if checkName == "etcd" || checkName == "etcd-readiness" {
continue
}
wantReadyzChecks = append(wantReadyzChecks, checkName)
}
healthChecksAreEqual(t, wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
})
Expand Down
1 change: 0 additions & 1 deletion test/e2e/apimachinery/health_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ var (
requiredReadyzChecks = sets.NewString(
"[+]ping ok",
"[+]log ok",
"[+]etcd ok",
"[+]informer-sync ok",
"[+]poststarthook/start-apiserver-admission-initializer ok",
"[+]poststarthook/generic-apiserver-start-informers ok",
Expand Down

0 comments on commit 1b40bbf

Please sign in to comment.