From 02ecd0ba36a1c62283c19ee108f2cbb42523ecfe Mon Sep 17 00:00:00 2001 From: Andreas Auernhammer Date: Tue, 31 Oct 2023 11:08:53 +0100 Subject: [PATCH] honor offline caching in `/v1/status` API Now, the `/v1/status` API will not try fetch the backend keystore status if: - offline caching is enabled. - the keystore is considered offline. The idea here is that if the cache considered the backend keystore as offline then it is unlikely that a `Status` suceeds. Instead, we can directly return unreachable. Once, the keystore becomes available again, the cache will detect it eventually (on its next health check cycle). Among other things, this improves pod availability when `/v1/status` is used as (part of) readiness probes. (i.e. MinIO) Such probes are now less likely to time out since `/v1/status` returns immediately. However, if the backend keystore becomes unavailable and a `/v1/status` request arrives BEFORE the cache detects that the keystore is offline then these requests may time out. Hence, it is recommended to use readiness probes with a retry threshold > 1. Signed-off-by: Andreas Auernhammer --- internal/api/response.go | 1 - keystore.go | 7 +++++++ server.go | 25 ++++++++++++------------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/internal/api/response.go b/internal/api/response.go index 7b517edb..57d4cd3d 100644 --- a/internal/api/response.go +++ b/internal/api/response.go @@ -26,7 +26,6 @@ type StatusResponse struct { StackAlloc uint64 `json:"mem_stack_used"` KeyStoreLatency int64 `json:"keystore_latency,omitempty"` // In microseconds - KeyStoreUnavailable bool `json:"keystore_unavailable,omitempty"` KeyStoreUnreachable bool `json:"keystore_unreachable,omitempty"` } diff --git a/keystore.go b/keystore.go index f48e0bf6..ba3dccdd 100644 --- a/keystore.go +++ b/keystore.go @@ -16,6 +16,7 @@ import ( "github.com/minio/kes-go" "github.com/minio/kes/internal/cache" "github.com/minio/kes/internal/key" + "github.com/minio/kes/kv" ) // A KeyStore stores key-value pairs. It provides durable storage for a @@ -238,7 +239,13 @@ type cacheEntry struct { } // Status returns the current state of the underlying KeyStore. +// +// It immediately returns an error if the backend keystore is not +// reachable and offline caching is enabled. func (c *keyCache) Status(ctx context.Context) (KeyStoreState, error) { + if c.offline.Load() { + return KeyStoreState{}, &kv.Unreachable{Err: errors.New("keystore is offline")} + } return c.store.Status(ctx) } diff --git a/server.go b/server.go index 4dd73382..5b3de36f 100644 --- a/server.go +++ b/server.go @@ -477,27 +477,27 @@ func (s *Server) ready(resp *api.Response, req *api.Request) { } func (s *Server) status(resp *api.Response, req *api.Request) { + info, err := sys.ReadBinaryInfo() + if err != nil { + s.state.Load().Log.ErrorContext(req.Context(), err.Error(), "req", req) + resp.Fail(http.StatusInternalServerError, "failed to read server version") + return + } + var ( - unavailable, unreachable bool - latency time.Duration + latency time.Duration + unreachable = true ) state, err := s.state.Load().Keys.Status(req.Context()) - if err != nil { - unavailable = true - _, unreachable = kv.IsUnreachable(err) - } else { + if err == nil { + unreachable = false latency = state.Latency.Round(time.Millisecond) + if latency == 0 { // Make sure we actually send a latency even if the key store respond time is < 1ms. latency = 1 * time.Millisecond } } - info, err := sys.ReadBinaryInfo() - if err != nil { - s.state.Load().Log.ErrorContext(req.Context(), err.Error(), "req", req) - resp.Fail(http.StatusInternalServerError, "failed to read server version") - return - } var memStats runtime.MemStats runtime.ReadMemStats(&memStats) @@ -513,7 +513,6 @@ func (s *Server) status(resp *api.Response, req *api.Request) { StackAlloc: memStats.StackSys, KeyStoreLatency: latency.Milliseconds(), - KeyStoreUnavailable: unavailable, KeyStoreUnreachable: unreachable, }) }