diff --git a/doc/changes/DM-40120.api.rst b/doc/changes/DM-40120.api.rst new file mode 100644 index 0000000000..74992746b5 --- /dev/null +++ b/doc/changes/DM-40120.api.rst @@ -0,0 +1,2 @@ +Added new parameter ``without_datastore`` to the ``Butler`` and ``ButlerConfig`` constructors to allow a butler to be created that can not access a datastore. +This can be helpful if you want to query registry without requiring the overhead of the datastore. diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 2b04691631..36c90d3b4c 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -69,6 +69,7 @@ DimensionRecord, DimensionUniverse, FileDataset, + NullDatastore, Progress, StorageClass, StorageClassFactory, @@ -149,6 +150,9 @@ class Butler(LimitedButler): the default for that dimension. Nonexistent collections are ignored. If a default value is provided explicitly for a governor dimension via ``**kwargs``, no default will be inferred for that dimension. + without_datastore : `bool`, optional + If `True` do not attach a datastore to this butler. Any attempts + to use a datastore will fail. **kwargs : `str` Default data ID key-value pairs. These may only identify "governor" dimensions like ``instrument`` and ``skymap``. @@ -203,6 +207,7 @@ def __init__( searchPaths: Sequence[ResourcePathExpression] | None = None, writeable: bool | None = None, inferDefaults: bool = True, + without_datastore: bool = False, **kwargs: str, ): defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) @@ -217,7 +222,7 @@ def __init__( self.storageClasses = butler.storageClasses self._config: ButlerConfig = butler._config else: - self._config = ButlerConfig(config, searchPaths=searchPaths) + self._config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) try: if "root" in self._config: butlerRoot = self._config["root"] @@ -228,9 +233,12 @@ def __init__( self._registry = _RegistryFactory(self._config).from_config( butlerRoot=butlerRoot, writeable=writeable, defaults=defaults ) - self._datastore = Datastore.fromConfig( - self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot - ) + if without_datastore: + self._datastore = NullDatastore(None, None) + else: + self._datastore = Datastore.fromConfig( + self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot + ) self.storageClasses = StorageClassFactory() self.storageClasses.addFromConfig(self._config) except Exception: diff --git a/python/lsst/daf/butler/_butlerConfig.py b/python/lsst/daf/butler/_butlerConfig.py index 6b2f17800e..30dc923a2f 100644 --- a/python/lsst/daf/butler/_butlerConfig.py +++ b/python/lsst/daf/butler/_butlerConfig.py @@ -61,12 +61,15 @@ class ButlerConfig(Config): than those read from the environment in `ConfigSubset.defaultSearchPaths()`. They are only read if ``other`` refers to a configuration file or directory. + without_datastore : `bool`, optional + If `True` remove the datastore configuration. """ def __init__( self, other: ResourcePathExpression | Config | None = None, searchPaths: Sequence[ResourcePathExpression] | None = None, + without_datastore: bool = False, ): self.configDir: ResourcePath | None = None @@ -155,6 +158,13 @@ def __init__( # configuration classes. We ask each of them to apply defaults to # the values we have been supplied by the user. for configClass in CONFIG_COMPONENT_CLASSES: + assert configClass.component is not None, "Config class component cannot be None" + + if without_datastore and configClass is DatastoreConfig: + if configClass.component in butlerConfig: + del butlerConfig[configClass.component] + continue + # Only send the parent config if the child # config component is present (otherwise it assumes that the # keys from other components are part of the child) @@ -163,7 +173,6 @@ def __init__( localOverrides = butlerConfig config = configClass(localOverrides, searchPaths=searchPaths) # Re-attach it using the global namespace - assert configClass.component is not None, "Config class component cannot be None" self.update({configClass.component: config}) # Remove the key from the butlerConfig since we have already # merged that information. diff --git a/python/lsst/daf/butler/core/datastore.py b/python/lsst/daf/butler/core/datastore.py index 0144f69fb1..2123e672ec 100644 --- a/python/lsst/daf/butler/core/datastore.py +++ b/python/lsst/daf/butler/core/datastore.py @@ -23,11 +23,12 @@ from __future__ import annotations -__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs") +__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore") import contextlib import dataclasses import logging +import time from abc import ABCMeta, abstractmethod from collections import abc, defaultdict from collections.abc import Callable, Iterable, Iterator, Mapping @@ -50,6 +51,8 @@ from .datastoreRecordData import DatastoreRecordData from .storageClass import StorageClass +_LOG = logging.getLogger(__name__) + class DatastoreConfig(ConfigSubset): """Configuration for Datastores.""" @@ -1205,3 +1208,122 @@ def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | guess dataset location based on its stored dataset type. """ pass + + +class NullDatastore(Datastore): + """A datastore that implements the `Datastore` API but always fails when + it accepts any request. + """ + + @classmethod + def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: + # Nothing to do. This is not a real Datastore. + pass + + def __init__( + self, + config: Config | ResourcePathExpression | None, + bridgeManager: DatastoreRegistryBridgeManager | None, + butlerRoot: ResourcePathExpression | None = None, + ): + # Name ourselves with the timestamp the datastore + # was created. + self.name = f"{type(self).__name__}@{time.time()}" + _LOG.debug("Creating datastore %s", self.name) + + return + + def knows(self, ref: DatasetRef) -> bool: + return False + + def exists(self, datasetRef: DatasetRef) -> bool: + return False + + def get( + self, + datasetRef: DatasetRef, + parameters: Mapping[str, Any] | None = None, + storageClass: StorageClass | str | None = None, + ) -> Any: + raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") + + def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def ingest( + self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True + ) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def transfer_from( + self, + source_datastore: Datastore, + refs: Iterable[DatasetRef], + transfer: str = "auto", + artifact_existence: dict[ResourcePath, bool] | None = None, + ) -> tuple[set[DatasetRef], set[DatasetRef]]: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: + raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") + + def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: + raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") + + def retrieveArtifacts( + self, + refs: Iterable[DatasetRef], + destination: ResourcePath, + transfer: str = "auto", + preserve_path: bool = True, + overwrite: bool = False, + ) -> list[ResourcePath]: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def remove(self, datasetRef: DatasetRef) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def forget(self, refs: Iterable[DatasetRef]) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def emptyTrash(self, ignore_errors: bool = True) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def export( + self, + refs: Iterable[DatasetRef], + *, + directory: ResourcePathExpression | None = None, + transfer: str | None = "auto", + ) -> Iterable[FileDataset]: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def validateConfiguration( + self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False + ) -> None: + # No configuration so always validates. + pass + + def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: + pass + + def getLookupKeys(self) -> set[LookupKey]: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def import_records( + self, + data: Mapping[str, DatastoreRecordData], + ) -> None: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") + + def export_records( + self, + refs: Iterable[DatasetIdRef], + ) -> Mapping[str, DatastoreRecordData]: + raise NotImplementedError("This is a no-op datastore that can not access a real datastore") diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py index 2723b1cf96..f11d3c0c32 100644 --- a/python/lsst/daf/butler/script/certifyCalibrations.py +++ b/python/lsst/daf/butler/script/certifyCalibrations.py @@ -63,7 +63,7 @@ def certifyCalibrations( Search all children of the inputCollection if it is a CHAINED collection, instead of just the most recent one. """ - butler = Butler(repo, writeable=True) + butler = Butler(repo, writeable=True, without_datastore=True) registry = butler.registry timespan = Timespan( begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None, diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py index d4a88204c8..0b8cd429ea 100644 --- a/python/lsst/daf/butler/script/collectionChain.py +++ b/python/lsst/daf/butler/script/collectionChain.py @@ -65,7 +65,7 @@ def collectionChain( chain : `tuple` of `str` The collections in the chain following this command. """ - butler = Butler(repo, writeable=True) + butler = Butler(repo, writeable=True, without_datastore=True) # Every mode needs children except pop. if not children and mode != "pop": diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py index 00276aa69f..2862516b84 100644 --- a/python/lsst/daf/butler/script/queryCollections.py +++ b/python/lsst/daf/butler/script/queryCollections.py @@ -134,7 +134,7 @@ def _getTree( names=("Name", "Type"), dtype=(str, str), ) - butler = Butler(repo) + butler = Butler(repo, without_datastore=True) def addCollection(name: str, level: int = 0) -> None: collectionType = butler.registry.getCollectionType(name) diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py index 44ad689bef..483f1d7795 100644 --- a/python/lsst/daf/butler/script/queryDataIds.py +++ b/python/lsst/daf/butler/script/queryDataIds.py @@ -103,7 +103,7 @@ def queryDataIds( Docstring for supported parameters is the same as `~lsst.daf.butler.Registry.queryDataIds`. """ - butler = Butler(repo) + butler = Butler(repo, without_datastore=True) if datasets and collections and not dimensions: # Determine the dimensions relevant to all given dataset types. diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py index 6cf28777ae..0ce3715a6e 100644 --- a/python/lsst/daf/butler/script/queryDatasetTypes.py +++ b/python/lsst/daf/butler/script/queryDatasetTypes.py @@ -55,7 +55,7 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components: A dict whose key is "datasetTypes" and whose value is a list of collection names. """ - butler = Butler(repo) + butler = Butler(repo, without_datastore=True) expression = glob if glob else ... datasetTypes = butler.registry.queryDatasetTypes(components=components, expression=expression) if verbose: diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py index 70d114f502..d965f0c6c2 100644 --- a/python/lsst/daf/butler/script/queryDatasets.py +++ b/python/lsst/daf/butler/script/queryDatasets.py @@ -167,7 +167,9 @@ def __init__( ): if (repo and butler) or (not repo and not butler): raise RuntimeError("One of repo and butler must be provided and the other must be None.") - self.butler = butler or Butler(repo) + # show_uri requires a datastore. + without_datastore = False if show_uri else True + self.butler = butler or Butler(repo, without_datastore=without_datastore) self._getDatasets(glob, collections, where, find_first) self.showUri = show_uri diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py index 868986e21a..5cbc2a6fff 100644 --- a/python/lsst/daf/butler/script/queryDimensionRecords.py +++ b/python/lsst/daf/butler/script/queryDimensionRecords.py @@ -48,7 +48,7 @@ def queryDimensionRecords( `~lsst.daf.butler.Registry.queryDimensionRecords` except for ``no_check``, which is the inverse of ``check``. """ - butler = Butler(repo) + butler = Butler(repo, without_datastore=True) query_collections: Iterable[str] | EllipsisType | None = None if datasets: diff --git a/python/lsst/daf/butler/script/register_dataset_type.py b/python/lsst/daf/butler/script/register_dataset_type.py index bf297132b6..75a176caed 100644 --- a/python/lsst/daf/butler/script/register_dataset_type.py +++ b/python/lsst/daf/butler/script/register_dataset_type.py @@ -63,7 +63,7 @@ def register_dataset_type( be created by this command. They are always derived from the composite dataset type. """ - butler = Butler(repo, writeable=True) + butler = Butler(repo, writeable=True, without_datastore=True) composite, component = DatasetType.splitDatasetTypeName(dataset_type) if component: diff --git a/python/lsst/daf/butler/script/removeDatasetType.py b/python/lsst/daf/butler/script/removeDatasetType.py index 00b44f01d6..eee60cab8b 100644 --- a/python/lsst/daf/butler/script/removeDatasetType.py +++ b/python/lsst/daf/butler/script/removeDatasetType.py @@ -37,5 +37,5 @@ def removeDatasetType(repo: str, dataset_type_name: tuple[str, ...]) -> None: datasetTypeName : `str` The name of the dataset type to be removed. """ - butler = Butler(repo, writeable=True) + butler = Butler(repo, writeable=True, without_datastore=True) butler.registry.removeDatasetType(dataset_type_name) diff --git a/tests/test_butler.py b/tests/test_butler.py index 433c692a63..728d432215 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -76,6 +76,7 @@ def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] FileDataset, FileTemplate, FileTemplateValidationError, + NullDatastore, StorageClassFactory, ValidationError, script, @@ -2332,6 +2333,56 @@ class ChainedDatastoreTransfers(PosixDatastoreTransfers): configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") +class NullDatastoreTestCase(unittest.TestCase): + """Test that we can fall back to a null datastore.""" + + # Need a good config to create the repo. + configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") + + @classmethod + def setUpClass(cls) -> None: + cls.storageClassFactory = StorageClassFactory() + cls.storageClassFactory.addFromConfig(cls.configFile) + + def setUp(self) -> None: + """Create a new butler root for each test.""" + self.root = makeTestTempDir(TESTDIR) + Butler.makeRepo(self.root, config=Config(self.configFile)) + + def tearDown(self) -> None: + removeTestTempDir(self.root) + + def test_fallback(self) -> None: + # Read the butler config and mess with the datastore section. + bad_config = Config(os.path.join(self.root, "butler.yaml")) + bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" + + with self.assertRaises(RuntimeError): + Butler(bad_config) + + butler = Butler(bad_config, writeable=True, without_datastore=True) + self.assertIsInstance(butler._datastore, NullDatastore) + + # Check that registry is working. + butler.registry.registerRun("MYRUN") + collections = butler.registry.queryCollections(...) + self.assertIn("MYRUN", set(collections)) + + # Create a ref. + dimensions = butler.dimensions.extract([]) + storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") + datasetTypeName = "metric" + datasetType = DatasetType(datasetTypeName, dimensions, storageClass) + butler.registry.registerDatasetType(datasetType) + ref = DatasetRef(datasetType, {}, run="MYRUN") + + # Check that datastore will complain. + with self.assertRaises(FileNotFoundError): + butler.get(ref) + with self.assertRaises(FileNotFoundError): + butler.getURI(ref) + + def setup_module(module: types.ModuleType) -> None: """Set up the module for pytest.""" clean_environment() diff --git a/tests/test_datastore.py b/tests/test_datastore.py index a18e13c648..de13806288 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -49,6 +49,7 @@ DatastoreValidationError, DimensionUniverse, FileDataset, + NullDatastore, StorageClass, StorageClassFactory, StoredFileInfo, @@ -1744,6 +1745,59 @@ def testCacheExpiryAge(self) -> None: self.assertIsInstance(found, ResourcePath) +class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): + """Test the null datastore.""" + + storageClassFactory = StorageClassFactory() + + def test_basics(self) -> None: + storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") + ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) + + null = NullDatastore(None, None) + + self.assertFalse(null.exists(ref)) + self.assertFalse(null.knows(ref)) + knows = null.knows_these([ref]) + self.assertFalse(knows[ref]) + null.validateConfiguration(ref) + + with self.assertRaises(FileNotFoundError): + null.get(ref) + with self.assertRaises(NotImplementedError): + null.put("", ref) + with self.assertRaises(FileNotFoundError): + null.getURI(ref) + with self.assertRaises(FileNotFoundError): + null.getURIs(ref) + with self.assertRaises(FileNotFoundError): + null.getManyURIs([ref]) + with self.assertRaises(NotImplementedError): + null.getLookupKeys() + with self.assertRaises(NotImplementedError): + null.import_records({}) + with self.assertRaises(NotImplementedError): + null.export_records([]) + with self.assertRaises(NotImplementedError): + null.export([ref]) + with self.assertRaises(NotImplementedError): + null.transfer(null, ref) + with self.assertRaises(NotImplementedError): + null.emptyTrash() + with self.assertRaises(NotImplementedError): + null.trash(ref) + with self.assertRaises(NotImplementedError): + null.forget([ref]) + with self.assertRaises(NotImplementedError): + null.remove(ref) + with self.assertRaises(NotImplementedError): + null.retrieveArtifacts([ref], ResourcePath(".")) + with self.assertRaises(NotImplementedError): + null.transfer_from(null, [ref]) + with self.assertRaises(NotImplementedError): + null.ingest() + + class DatasetRefURIsTestCase(unittest.TestCase): """Tests for DatasetRefURIs."""