From c26fb83631d8b162fcc8f60f76ac2eeb1403d03b Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 19 Aug 2024 17:03:24 -0700 Subject: [PATCH] Seed remote butler factory with default-disabled datastore cache manager --- .../daf/butler/datastore/cache_manager.py | 29 +++++++++++++++++-- .../lsst/daf/butler/remote_butler/_factory.py | 4 +++ .../butler/remote_butler/_remote_butler.py | 11 ++++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/datastore/cache_manager.py b/python/lsst/daf/butler/datastore/cache_manager.py index 16eeb82c22..42af4d1982 100644 --- a/python/lsst/daf/butler/datastore/cache_manager.py +++ b/python/lsst/daf/butler/datastore/cache_manager.py @@ -49,12 +49,12 @@ from collections import defaultdict from collections.abc import ItemsView, Iterable, Iterator, KeysView, ValuesView from random import Random -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from lsst.resources import ResourcePath from pydantic import BaseModel, PrivateAttr -from .._config import ConfigSubset +from .._config import Config, ConfigSubset from .._config_support import processLookupConfigs from .._dataset_ref import DatasetId, DatasetRef @@ -602,6 +602,31 @@ def cache_size(self) -> int: def file_count(self) -> int: return len(self._cache_entries) + @classmethod + def create_disabled(cls) -> Self: + """Create an instance that is disabled by default but can be + overridden by the environment. + + Returns + ------- + cache_manager : `DatastoreCacheManager` + A new cache manager, that is disabled by default but might be + enabled if environment variables are set. + """ + config_str = """ +cached: + cacheable: + irrelevant: false + expiry: + mode: disabled + threshold: 0 + """ + config = Config.fromYaml(config_str) + return cls( + DatastoreCacheManagerConfig(config), + universe=None, # type: ignore + ) + @classmethod def set_fallback_cache_directory_if_unset(cls) -> tuple[bool, str]: """Define a fallback cache directory if a fallback not set already. diff --git a/python/lsst/daf/butler/remote_butler/_factory.py b/python/lsst/daf/butler/remote_butler/_factory.py index 9270f154ed..a4d7f15ca5 100644 --- a/python/lsst/daf/butler/remote_butler/_factory.py +++ b/python/lsst/daf/butler/remote_butler/_factory.py @@ -34,6 +34,7 @@ from .._butler_config import ButlerConfig from .._butler_instance_options import ButlerInstanceOptions +from ..datastore.cache_manager import DatastoreCacheManager from ._authentication import get_authentication_token_from_environment from ._config import RemoteButlerConfigModel from ._http_connection import RemoteButlerHttpConnection @@ -91,12 +92,15 @@ def create_butler_for_access_token( ) -> RemoteButler: if butler_options is None: butler_options = ButlerInstanceOptions() + # Use a disabled datastore cache. Environment variables can be + # set to override. return RemoteButler( connection=RemoteButlerHttpConnection( http_client=self.http_client, server_url=self.server_url, access_token=access_token ), options=butler_options, cache=self._cache, + datastore_cache_manager=DatastoreCacheManager.create_disabled(), ) def create_butler_with_credentials_from_environment( diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index d4631c816f..e09b4a58e9 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -98,6 +98,9 @@ class RemoteButler(Butler): # numpydoc ignore=PR02 cache : `RemoteButlerCache` Cache of data shared between multiple RemoteButler instances connected to the same server. + datastore_cache_manager : `AbstractDatastoreCacheManager` or `None` + Datastore cache manager to use. If not provided a per-instance + cache manager will be constructed from default configuration. Notes ----- @@ -125,13 +128,14 @@ def __new__( connection: RemoteButlerHttpConnection, options: ButlerInstanceOptions, cache: RemoteButlerCache, + datastore_cache_manager: AbstractDatastoreCacheManager | None = None, ) -> RemoteButler: self = cast(RemoteButler, super().__new__(cls)) self.storageClasses = StorageClassFactory() self._connection = connection self._cache = cache - self._datastore_cache_manager = None + self._datastore_cache_manager = datastore_cache_manager # Avoid a circular import by deferring this import. from ._registry import RemoteButlerRegistry @@ -179,7 +183,7 @@ def dimensions(self) -> DimensionUniverse: return cache.dimensions @property - def datastore_cache_manager(self) -> AbstractDatastoreCacheManager: + def _cache_manager(self) -> AbstractDatastoreCacheManager: """Cache manager to use when reading files from the butler.""" # RemoteButler does not get any cache configuration from the server. # Read the Datastore default config (which is a FileDatastore) @@ -188,7 +192,6 @@ def datastore_cache_manager(self) -> AbstractDatastoreCacheManager: # defaults for DatastoreConfig no longer include the cache. if self._datastore_cache_manager is None: datastore_config = DatastoreConfig() - self._datastore_cache_manager: AbstractDatastoreCacheManager if "cached" in datastore_config: self._datastore_cache_manager = DatastoreCacheManager( datastore_config["cached"], universe=self.dimensions @@ -279,7 +282,7 @@ def _get_dataset_as_python_object( ref, _to_file_payload(model), parameters=parameters, - cache_manager=self.datastore_cache_manager, + cache_manager=self._cache_manager, ) def _get_file_info(