PostgREST · steve-chavez · Jun 26, 2024 · Jun 25, 2024 · Jun 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
  - #3558, Add the `admin-server-host` config to set the host for the admin server - @develop7
 
+### Fixed
+
+ - #3147, Don't reload schema cache on every listener failure - @steve-chavez
+
 ### Changed
 
  - #2052, Dropped support for PostgreSQL 9.6 - @wolfgangwalther

diff --git a/docs/references/listener.rst b/docs/references/listener.rst
@@ -47,3 +47,14 @@ This will cause the :ref:`connection_pool` to connect to the read replica host a
 .. note::
 
   Under the hood, PostgREST forces `target_session_attrs=read-write <https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-TARGET-SESSION-ATTRS>`_ for the ``LISTEN`` session.
+
+.. _listener_automatic_recovery:
+
+Automatic Recovery
+==================
+
+The listener will retry reconnecting to the database if connection loss happens.
+
+- It will retry forever with exponential backoff, with a maximum backoff time of 32 seconds between retries. Each of these attempts are :ref:`logged <pgrst_logging>`.
+- Automatic recovery can be disabled by setting :ref:`db-pool-automatic-recovery` to ``false``.
+- To ensure a valid state, the listener reloads the :ref:`schema_cache` and :ref:`configuration` when recovering.
diff --git a/src/PostgREST/AppState.hs b/src/PostgREST/AppState.hs
@@ -29,7 +29,6 @@ module PostgREST.AppState
   , getObserver
   , isLoaded
   , isPending
-  , waitForListenerCanStart
   ) where
 
 import qualified Data.Aeson                 as JSON
@@ -99,8 +98,6 @@ data AppState = AppState
   , stateIsListenerOn         :: IORef Bool
   -- | starts the connection worker with a debounce
   , debouncedConnectionWorker :: IO ()
-  -- | Binary semaphore used to sync the listener with the connectionWorker.
-  , stateListenerCanStart     :: MVar ()
   -- | Config that can change at runtime
   , stateConf                 :: IORef AppConfig
   -- | Time used for verifying JWT expiration
@@ -159,7 +156,6 @@ initWithPool (sock, adminSock) pool conf loggerState metricsState observer = do
     <*> newIORef ConnPending
     <*> newIORef False
     <*> pure (pure ())
-    <*> newEmptyMVar
     <*> newIORef conf
     <*> mkAutoUpdate defaultUpdateSettings { updateAction = getCurrentTime }
     <*> myThreadId
@@ -322,26 +318,6 @@ getNextListenerDelay = readIORef . stateNextListenerDelay
 putNextListenerDelay :: AppState -> Int -> IO ()
 putNextListenerDelay = atomicWriteIORef . stateNextListenerDelay
 
---------------------------------------------------------------------------------------
--------------------------------------------IMPORTANT----------------------------------
---------------------------------------------------------------------------------------
--- Both of these function ensure there's no parallel connection attempts between the listener and the connection pool.
--- Doing that raised an error with GSSAPI as discussed on https://github.com/PostgREST/postgrest/issues/3569.
--- Until the root cause is found and solved, we need to prevent parallel connection attempts.
-
--- tryPutMVar doesn't lock the thread. It should always succeed since
--- the connectionWorker is the only mvar producer.
-setListenerCanStart :: AppState -> IO ()
-setListenerCanStart appState = void $ tryPutMVar (stateListenerCanStart appState) ()
-
--- | As this IO action uses `takeMVar` internally, it will only return once
--- `stateListenerCanStart` has been set using `setListenerCanStart`.
-waitForListenerCanStart :: AppState -> IO ()
-waitForListenerCanStart = takeMVar . stateListenerCanStart
-
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
-
 getConfig :: AppState -> IO AppConfig
 getConfig = readIORef . stateConf
 
@@ -461,9 +437,6 @@ internalConnectionWorker appState@AppState{stateObserver=observer, stateMainThre
             observer $ ExitUnsupportedPgVersion actualPgVersion minimumPgVersion
             killThread mainThreadId
           observer (DBConnectedObs $ pgvFullName actualPgVersion)
-          -- Wake up the Listener
-          when configDbChannelEnabled $
-            setListenerCanStart appState
           -- this could be fail because the connection drops, but the loadSchemaCache will pick the error and retry again
           -- We cannot retry after it fails immediately, because db-pre-config could have user errors. We just log the error and continue.
           when configDbConfig $ reReadConfig False appState

diff --git a/src/PostgREST/Listener.hs b/src/PostgREST/Listener.hs
@@ -27,16 +27,10 @@
 -- | Starts a LISTEN connection and handles notifications. It recovers with exponential backoff with a cap of 32 seconds, if the LISTEN connection is lost.
 retryingListen :: AppState -> IO ()
 retryingListen appState = do
-  AppState.waitForListenerCanStart appState
   AppConfig{..} <- AppState.getConfig appState
   let
     dbChannel = toS configDbChannel
     handleFinally err = do
-      -- assume we lost notifications, call the connection worker which will also reload the schema cache
-      -- and will setListenerCanStart again
-      -- TODO: When the connection error is only on the Listener, it's wasteful to call the connectionWorker everytime.
-      AppState.connectionWorker appState
-
       AppState.putIsListenerOn appState False
       observer $ DBListenFail dbChannel (Right err)
       unless configDbPoolAutomaticRecovery $
@@ -60,6 +54,8 @@
 
         delay <- AppState.getNextListenerDelay appState
         when (delay > 1) $ do -- if we did a retry
+          -- assume we lost notifications, call the connection worker which will also reload the schema cache
+          AppState.connectionWorker appState
           -- reset the delay
           AppState.putNextListenerDelay appState 1