diff --git a/doc/changes/DM-46599.removal.md b/doc/changes/DM-46599.removal.md new file mode 100644 index 0000000000..ef83ace4b2 --- /dev/null +++ b/doc/changes/DM-46599.removal.md @@ -0,0 +1,19 @@ +Regular expressions in collection and dataset type patterns are now deprecated. (Shell-like globs will continue to be supported.) + +Materializing dataset queries into temporary tables is now deprecated. (Materializing data ID queries will continue to be supported.) + +The `datasetTypes` argument to `Registry.queryCollections` is now deprecated. (This parameter has never had any effect.) + +We will soon stop raising `DataIdValueError` exceptions for typos and other bad values in query expressions like `instrument='HsC'` for typos and other bad values in query expressions. Instead, these queries will return an empty iterable of results. + +Using HTM and HEALPix spatial dimensions like `htm11` or `healpix10` in data ID constraints passed to queries is now deprecated. The exception is `htm7`, which will continue to work. + +The `--no-check` parameter to `butler query-dimension-records` is now deprecated. + +The `offset` argument to `limit()` for `Registry.queryDataIds` and `Registry.queryDimensionRecords` result objects is now deprecated. + +The `--offset` option for `butler query-data-ids` and `butler-query-datasets` is no longer supported, and will raise on exception if you attempt to use it. + +It will soon become mandatory to explicitly provide `--collections` and a dataset type search when calling `butler query-datasets`. + +Using `Butler.collections` to get the list of default collections is now deprecated. Use `Butler.collections.defaults` instead. diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py index 504dcc12c3..d39d0d14db 100644 --- a/python/lsst/daf/butler/_butler_collections.py +++ b/python/lsst/daf/butler/_butler_collections.py @@ -34,6 +34,7 @@ from collections.abc import Iterable, Mapping, Sequence, Set from typing import TYPE_CHECKING, Any, overload +from deprecated.sphinx import deprecated from pydantic import BaseModel from ._collection_type import CollectionType @@ -83,9 +84,21 @@ def __getitem__(self, index: int) -> str: ... @overload def __getitem__(self, index: slice) -> Sequence[str]: ... + @deprecated( + "‘Butler.collections’ should no longer be used to get the list of default collections." + " Use ‘Butler.collections.default’ instead. Will be removed after v28.", + version="v28", + category=FutureWarning, + ) def __getitem__(self, index: int | slice) -> str | Sequence[str]: return self.defaults[index] + @deprecated( + "‘Butler.collections’ should no longer be used to get the list of default collections." + " Use ‘Butler.collections.default’ instead. Will be removed after v28.", + version="v28", + category=FutureWarning, + ) def __len__(self) -> int: return len(self.defaults) diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py index 21d9819902..119be85c20 100644 --- a/python/lsst/daf/butler/cli/cmd/commands.py +++ b/python/lsst/daf/butler/cli/cmd/commands.py @@ -576,7 +576,7 @@ def query_data_ids(**kwargs: Any) -> None: def query_dimension_records(**kwargs: Any) -> None: """Query for dimension information.""" if kwargs.pop("no_check") is not None: - click.echo("WARNING: --no-check option now has no effect and is ignored.") + click.echo("WARNING: --no-check option has no effect and will be removed after v28.") table = script.queryDimensionRecords(**kwargs) if table: table.pprint_all() diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index f9f85d4338..c643e7ce09 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -39,11 +39,15 @@ ) import itertools +import warnings from abc import abstractmethod from collections.abc import Iterable, Iterator, Sequence from contextlib import AbstractContextManager, ExitStack, contextmanager +from types import EllipsisType from typing import Any, Self +from deprecated.sphinx import deprecated + from ..._dataset_ref import DatasetRef from ..._dataset_type import DatasetType from ..._exceptions_legacy import DatasetTypeError @@ -527,8 +531,14 @@ def order_by(self, *args: str) -> Self: self._query = self._query.sorted(clause.terms, defer=True) return self - def limit(self, limit: int, offset: int | None = 0) -> Self: - if offset is None: + def limit(self, limit: int, offset: int | None | EllipsisType = ...) -> Self: + if offset is not ...: + warnings.warn( + "'offset' parameter should no longer be used. It is not supported by the new query system." + " Will be removed after v28.", + FutureWarning, + ) + if offset is None or offset is ...: offset = 0 self._query = self._query.sliced(offset, offset + limit, defer=True) return self @@ -665,6 +675,11 @@ def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: yield self @contextmanager + @deprecated( + "This method should no longer be used. Will be removed after v28.", + version="v28", + category=FutureWarning, + ) def materialize(self) -> Iterator[DatabaseParentDatasetQueryResults]: # Docstring inherited from DatasetQueryResults. with self._query.open_context(): @@ -817,9 +832,15 @@ def order_by(self, *args: str) -> Self: self._query = self._query.sorted(clause.terms, defer=True) return self - def limit(self, limit: int, offset: int | None = 0) -> Self: + def limit(self, limit: int, offset: int | None | EllipsisType = ...) -> Self: # Docstring inherited from base class. - if offset is None: + if offset is not ...: + warnings.warn( + "'offset' parameter should no longer be used. It is not supported by the new query system." + " Will be removed after v28.", + FutureWarning, + ) + if offset is None or offset is ...: offset = 0 self._query = self._query.sliced(offset, offset + limit, defer=True) return self diff --git a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py index da132831f9..b3d632211b 100644 --- a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py @@ -28,6 +28,7 @@ __all__ = ("SqlQueryBackend",) +import warnings from collections.abc import Iterable, Mapping, Sequence, Set from contextlib import AbstractContextManager from typing import TYPE_CHECKING, Any, cast @@ -337,6 +338,11 @@ def resolve_governor_constraints( } if (constraint_values := constraints.get(dimension_name)) is not None: if not (constraint_values <= all_values): + warnings.warn( + "DataIdValueError will no longer be raised for invalid governor dimension values." + " Instead, an empty list will be returned. Will be changed after v28.", + FutureWarning, + ) raise DataIdValueError( f"Unknown values specified for governor dimension {dimension_name}: " f"{constraint_values - all_values}." diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py index 8bf798ce87..0c062397de 100644 --- a/python/lsst/daf/butler/registry/queries/_structs.py +++ b/python/lsst/daf/butler/registry/queries/_structs.py @@ -29,6 +29,7 @@ __all__ = ["QuerySummary"] # other classes here are local to subpackage import dataclasses +import warnings from collections.abc import Iterable, Mapping, Set from typing import Any @@ -393,6 +394,16 @@ def __init__( self.order_by = None if order_by is None else OrderByClause.parse_general(order_by, requested) self.limit = limit self.columns_required, self.dimensions, self.region = self._compute_columns_required() + for dimension in self.where.data_id.dimensions.names: + if ( + dimension.startswith("htm") or dimension.startswith("healpix") + ) and not dimension == self.universe.commonSkyPix.name: + warnings.warn( + f"Dimension '{dimension}' should no longer be used in data IDs." + " Use the region 'OVERLAPS' operator in the where clause instead." + " Will be removed after v28.", + FutureWarning, + ) requested: DimensionGroup """Dimensions whose primary keys should be included in the result rows of diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index d665aa9d15..a2ad378e23 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -1789,6 +1789,12 @@ def queryCollections( # Right now the datasetTypes argument is completely ignored, but that # is consistent with its [lack of] guarantees. DM-24939 or a follow-up # ticket will take care of that. + if datasetType is not None: + warnings.warn( + "The datasetType parameter should no longer be used. It has" + " never had any effect. Will be removed after v28", + FutureWarning, + ) try: wildcard = CollectionWildcard.from_expression(expression) except TypeError as exc: @@ -2421,7 +2427,7 @@ def queryDatasetAssociations( if isinstance(datasetType, str): datasetType = self.getDatasetType(datasetType) resolved_collections = self.queryCollections( - collections, datasetType, collectionTypes=collectionTypes, flattenChains=flattenChains + collections, collectionTypes=collectionTypes, flattenChains=flattenChains ) with self._query() as query: query = query.join_dataset_search(datasetType, resolved_collections) diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 28e561cc06..4b6334fb18 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -42,6 +42,7 @@ from collections import defaultdict, namedtuple from collections.abc import Callable, Iterable, Iterator from concurrent.futures import ThreadPoolExecutor +from contextlib import ExitStack from datetime import timedelta from threading import Barrier from typing import TypeVar @@ -148,6 +149,11 @@ class RegistryTests(ABC): searches. The new one is able to search in these collections.) """ + supportsNonCommonSkypixQueries: bool = True + """True if the registry class being tested supports data ID constraints + like {'htm11' = ...} + """ + @classmethod @abstractmethod def getDataDir(cls) -> str: @@ -835,15 +841,19 @@ def testCollections(self): if self.supportsCollectionRegex: # Query for collections matching a regex. - self.assertCountEqual( - list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), - ["imported_r", "imported_g"], - ) + with self.assertWarns(FutureWarning): + self.assertCountEqual( + list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), + ["imported_r", "imported_g"], + ) # Query for collections matching a regex or an explicit str. - self.assertCountEqual( - list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), - ["imported_r", "imported_g", "chain1"], - ) + with self.assertWarns(FutureWarning): + self.assertCountEqual( + list( + registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False) + ), + ["imported_r", "imported_g", "chain1"], + ) # Same queries as the regex ones above, but using globs instead of # regex. self.assertCountEqual( @@ -1388,7 +1398,8 @@ def do_query(): if self.supportsQueryGovernorValidation: # Specifying non-existing skymap is an exception with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): - do_query() + with self.assertWarns(FutureWarning): + do_query() else: # New query system returns zero rows instead of raising an # exception. @@ -1726,14 +1737,15 @@ def testQueryResults(self): # Materialize the bias dataset queries (only) by putting the results # into temporary tables, then repeat those tests. - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=False - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedAllBiases) - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=True - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedDeduplicatedBiases) + with self.assertWarns(FutureWarning): + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=False + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedAllBiases) + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=True + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedDeduplicatedBiases) # Materialize the data ID subset query, but not the dataset queries. with subsetDataIds.materialize() as subsetDataIds: self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) @@ -1753,14 +1765,15 @@ def testQueryResults(self): expectedDeduplicatedBiases, ) # Materialize the dataset queries, too. - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=False - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedAllBiases) - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=True - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedDeduplicatedBiases) + with self.assertWarns(FutureWarning): + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=False + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedAllBiases) + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=True + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedDeduplicatedBiases) # Materialize the original query, but none of the follow-up queries. with dataIds.materialize() as dataIds: self.assertEqual(dataIds.dimensions, expected_dimensions) @@ -1792,14 +1805,15 @@ def testQueryResults(self): expectedDeduplicatedBiases, ) # Materialize just the bias dataset queries. - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=False - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedAllBiases) - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=True - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedDeduplicatedBiases) + with self.assertWarns(FutureWarning): + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=False + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedAllBiases) + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=True + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedDeduplicatedBiases) # Materialize the subset data ID query, but not the dataset # queries. with subsetDataIds.materialize() as subsetDataIds: @@ -1823,14 +1837,15 @@ def testQueryResults(self): ) # Materialize the bias dataset queries, too, so now we're # materializing every single step. - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=False - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedAllBiases) - with subsetDataIds.findDatasets( - bias, collections=["imported_r", "imported_g"], findFirst=True - ).materialize() as biases: - self.assertCountEqual(list(biases), expectedDeduplicatedBiases) + with self.assertWarns(FutureWarning): + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=False + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedAllBiases) + with subsetDataIds.findDatasets( + bias, collections=["imported_r", "imported_g"], findFirst=True + ).materialize() as biases: + self.assertCountEqual(list(biases), expectedDeduplicatedBiases) def testStorageClassPropagation(self): """Test that queries for datasets respect the storage class passed in @@ -2700,7 +2715,8 @@ def testSkipCalibs(self): # regular expression will skip too if self.supportsCollectionRegex: pattern = re.compile(".*") - datasets = list(registry.queryDatasets("bias", collections=pattern)) + with self.assertWarns(FutureWarning): + datasets = list(registry.queryDatasets("bias", collections=pattern)) self.assertGreater(len(datasets), 0) # ellipsis should work as usual @@ -3364,7 +3380,10 @@ def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): dimensions = test.dimensions.split(",") if test.exception: with self.assertRaises(test.exception): - do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() + with ExitStack() as stack: + if test.exception == DataIdValueError: + stack.enter_context(self.assertWarns(FutureWarning)) + do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() else: query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) self.assertEqual(query.count(discard=True), test.count) @@ -3372,9 +3391,12 @@ def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): # and materialize if test.exception: with self.assertRaises(test.exception): - query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) - with query.materialize() as materialized: - materialized.count(discard=True) + with ExitStack() as stack: + if test.exception == DataIdValueError: + stack.enter_context(self.assertWarns(FutureWarning)) + query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) + with query.materialize() as materialized: + materialized.count(discard=True) else: query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) with query.materialize() as materialized: @@ -3413,10 +3435,8 @@ def do_query(element, datasets=None, collections=None): Test("visit", "-visit.name", (2, 1)), Test("visit", "day_obs,-visit.timespan.begin", (2, 1)), ] - if self.supportsQueryOffset: - test_data.append(Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2))) - for test in test_data: + def do_test(test: Test): order_by = test.order_by.split(",") query = do_query(test.element).order_by(*order_by) if test.limit is not None: @@ -3424,6 +3444,13 @@ def do_query(element, datasets=None, collections=None): dataIds = tuple(rec.id for rec in query) self.assertEqual(dataIds, test.result) + for test in test_data: + do_test(test) + + if self.supportsQueryOffset: + with self.assertWarns(FutureWarning): + do_test(Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2))) + # errors in a name for order_by in ("", "-"): with self.assertRaisesRegex( @@ -3482,23 +3509,31 @@ def testQueryDimensionRecordsExceptions(self): self.assertEqual(result.count(), 4) if self.supportsQueryGovernorValidation: - with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): - result = registry.queryDimensionRecords("detector", instrument="NotCam1") - result.count() - - with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): - result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) - result.count() - - with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): - result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") - result.count() - - with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): - result = registry.queryDimensionRecords( - "detector", where="instrument=instr", bind={"instr": "NotCam1"} - ) - result.count() + with self.assertWarns(FutureWarning): + with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): + result = registry.queryDimensionRecords("detector", instrument="NotCam1") + result.count() + + with self.assertWarns(FutureWarning): + with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): + result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) + result.count() + + with self.assertWarns(FutureWarning): + with self.assertRaisesRegex( + DataIdValueError, "Unknown values specified for governor dimension" + ): + result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") + result.count() + + with self.assertWarns(FutureWarning): + with self.assertRaisesRegex( + DataIdValueError, "Unknown values specified for governor dimension" + ): + result = registry.queryDimensionRecords( + "detector", where="instrument=instr", bind={"instr": "NotCam1"} + ) + result.count() def testDatasetConstrainedDimensionRecordQueries(self): """Test that queryDimensionRecords works even when given a dataset @@ -3697,17 +3732,18 @@ def test_skypix_constraint_queries(self) -> None: # New query system does not support non-common skypix constraints # and we are deprecating it to replace with region-based constraints. # TODO: Drop this tests once we remove support for non-common skypix. - with contextlib.suppress(NotImplementedError): - self.assertEqual( - { - (data_id["tract"], data_id["patch"]) - for data_id in registry.queryDataIds( - ["patch"], - dataId={skypix_dimension.name: skypix_id}, - ) - }, - overlapping_patches, - ) + if self.supportsNonCommonSkypixQueries: + with self.assertWarns(FutureWarning): + self.assertEqual( + { + (data_id["tract"], data_id["patch"]) + for data_id in registry.queryDataIds( + ["patch"], + dataId={skypix_dimension.name: skypix_id}, + ) + }, + overlapping_patches, + ) # Test that a three-way join that includes the common skypix system in # the dimensions doesn't generate redundant join terms in the query. # TODO: In the new query system this raises InvalidQueryError, change diff --git a/python/lsst/daf/butler/registry/wildcards.py b/python/lsst/daf/butler/registry/wildcards.py index 666bb72f6e..05bac58dee 100644 --- a/python/lsst/daf/butler/registry/wildcards.py +++ b/python/lsst/daf/butler/registry/wildcards.py @@ -35,6 +35,7 @@ import contextlib import dataclasses import re +import warnings from collections.abc import Callable, Iterable, Mapping from types import EllipsisType from typing import Any @@ -181,7 +182,9 @@ def fromExpression( # a local function so we can recurse after coercion. def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None: + was_string = False if isinstance(element, str): + was_string = True if defaultItemValue is not None: self.items.append((element, defaultItemValue)) return None @@ -197,6 +200,12 @@ def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None: self.strings.append(element) return None if allowPatterns and isinstance(element, re.Pattern): + if not was_string: + warnings.warn( + "Regular expressions should no longer be used in collection or dataset type searches." + " Use globs ('*' wildcards) instead. Will be removed after v28.", + FutureWarning, + ) self.patterns.append(element) return None if alreadyCoerced: diff --git a/python/lsst/daf/butler/remote_butler/_query_driver.py b/python/lsst/daf/butler/remote_butler/_query_driver.py index 8bc9097cbf..3a27d48b48 100644 --- a/python/lsst/daf/butler/remote_butler/_query_driver.py +++ b/python/lsst/daf/butler/remote_butler/_query_driver.py @@ -223,7 +223,7 @@ def explain_no_results(self, tree: QueryTree, execute: bool) -> Iterable[str]: return result.messages def get_default_collections(self) -> tuple[str, ...]: - collections = tuple(self._butler.collections) + collections = tuple(self._butler.collections.defaults) if not collections: raise NoDefaultCollectionError("No collections provided and no default collections.") return collections diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 6bd40571b0..bda66fc20a 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -584,11 +584,11 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle methods to a standardized format for the REST API. """ if collections is None: - if not self.collections: + if not self.collections.defaults: raise NoDefaultCollectionError( "No collections provided, and no defaults from butler construction." ) - collections = self.collections + collections = self.collections.defaults return convert_collection_arg_to_glob_string_list(collections) def clone( diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py index 12f0e59407..b6226fb88d 100644 --- a/python/lsst/daf/butler/script/queryDataIds.py +++ b/python/lsst/daf/butler/script/queryDataIds.py @@ -139,7 +139,7 @@ def queryDataIds( `~lsst.daf.butler.Registry.queryDataIds`. """ if offset: - raise RuntimeError("Offset is no longer supported by the query system.") + raise NotImplementedError("--offset is no longer supported. It will be removed after v28.") butler = Butler.from_config(repo, without_datastore=True) diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py index c72895d14d..838fe7fe50 100644 --- a/python/lsst/daf/butler/script/queryDatasets.py +++ b/python/lsst/daf/butler/script/queryDatasets.py @@ -27,6 +27,7 @@ from __future__ import annotations import logging +import warnings from collections import defaultdict from collections.abc import Iterable, Iterator from typing import TYPE_CHECKING, Any @@ -185,6 +186,20 @@ def __init__( ): if (repo and butler) or (not repo and not butler): raise RuntimeError("One of repo and butler must be provided and the other must be None.") + collections = list(collections) + if not collections: + warnings.warn( + "No --collections specified. The --collections argument will become mandatory after v28.", + FutureWarning, + ) + glob = list(glob) + if not glob: + warnings.warn( + "No dataset types specified. Explicitly specifying dataset types will become mandatory" + " after v28. Specify '*' to match the current behavior of querying all dataset types.", + FutureWarning, + ) + # show_uri requires a datastore. without_datastore = not show_uri self.butler = butler or Butler.from_config(repo, without_datastore=without_datastore) diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py index 54dc8249f8..272ef5563a 100644 --- a/python/lsst/daf/butler/script/queryDimensionRecords.py +++ b/python/lsst/daf/butler/script/queryDimensionRecords.py @@ -77,6 +77,9 @@ def queryDimensionRecords( `~lsst.daf.butler.Registry.queryDimensionRecords` except for ``no_check``, which is the inverse of ``check``. """ + if offset: + raise NotImplementedError("--offset is no longer supported. It will be removed after v28.") + butler = Butler.from_config(repo, without_datastore=True) with butler.query() as query: diff --git a/tests/test_butler.py b/tests/test_butler.py index e0f1142b2b..ac696dba0a 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -472,7 +472,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But ) self.assertEqual(count, stop) - compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections) + compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections.defaults) assert compRef is not None summary = butler.get(compRef) self.assertEqual(summary, metric.summary) @@ -1235,7 +1235,7 @@ def testTransaction(self) -> None: with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): butler.get(datasetTypeName, dataId) # Also check explicitly if Dataset entry is missing - self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections)) + self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections.defaults)) # Direct retrieval should not find the file in the Datastore with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): butler.get(ref) diff --git a/tests/test_cliCmdQueryDatasets.py b/tests/test_cliCmdQueryDatasets.py index 44ded271d7..74fb5d958a 100644 --- a/tests/test_cliCmdQueryDatasets.py +++ b/tests/test_cliCmdQueryDatasets.py @@ -174,7 +174,7 @@ def testChained(self): self.repoDir, configFile=os.path.join(TESTDIR, "config/basic/butler-chained.yaml") ) - tables = self._queryDatasets(repo=testRepo.butler, show_uri=True) + tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*") # Want second datastore root since in-memory is ephemeral and # all the relevant datasets are stored in the second as well as third @@ -192,7 +192,7 @@ def testShowURI(self): """Test for expected output with show_uri=True.""" testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile) - tables = self._queryDatasets(repo=testRepo.butler, show_uri=True) + tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*") roots = testRepo.butler.get_datastore_roots() datastore_root = list(roots.values())[0] @@ -209,7 +209,7 @@ def testShowUriNoDisassembly(self): storageClassName="StructuredCompositeReadCompNoDisassembly", ) - tables = self._queryDatasets(repo=testRepo.butler, show_uri=True) + tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*") roots = testRepo.butler.get_datastore_roots() datastore_root = list(roots.values())[0] @@ -252,7 +252,7 @@ def testNoShowURI(self): """Test for expected output without show_uri (default is False).""" testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile) - tables = self._queryDatasets(repo=testRepo.butler) + tables = self._queryDatasets(repo=testRepo.butler, collections="*", glob="*") expectedTables = ( AstropyTable( @@ -274,7 +274,9 @@ def testWhere(self): """ testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile) - tables = self._queryDatasets(repo=testRepo.butler, where="instrument='DummyCamComp' AND visit=423") + tables = self._queryDatasets( + repo=testRepo.butler, where="instrument='DummyCamComp' AND visit=423", collections="*", glob="*" + ) expectedTables = ( AstropyTable( @@ -286,7 +288,7 @@ def testWhere(self): self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True) with self.assertRaises(RuntimeError): - self._queryDatasets(repo=testRepo.butler, collections="*", find_first=True) + self._queryDatasets(repo=testRepo.butler, collections="*", find_first=True, glob="*") def testGlobDatasetType(self): """Test specifying dataset type.""" @@ -311,7 +313,7 @@ def testGlobDatasetType(self): testRepo.addDataset(dataId={"instrument": "DummyCamComp", "visit": 425}, datasetType=datasetType) # verify the new dataset type increases the number of tables found: - tables = self._queryDatasets(repo=testRepo.butler) + tables = self._queryDatasets(repo=testRepo.butler, collections="*", glob="*") expectedTables = ( AstropyTable( @@ -331,13 +333,19 @@ def testGlobDatasetType(self): self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True) + # Dataset type (glob) argument will become mandatory soon + with self.assertWarns(FutureWarning): + self._queryDatasets(repo=testRepo.butler, collections="*") + def test_limit_order(self): """Test limit and ordering.""" # Create and register an additional DatasetType testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile) with self.assertLogs("lsst.daf.butler.script.queryDatasets", level="WARNING") as cm: - tables = self._queryDatasets(repo=testRepo.butler, limit=-1, order_by=("visit")) + tables = self._queryDatasets( + repo=testRepo.butler, limit=-1, order_by=("visit"), collections="*", glob="*" + ) self.assertIn("increase this limit", cm.output[0]) @@ -350,7 +358,9 @@ def test_limit_order(self): self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True) with self.assertLogs("lsst.daf.butler.script.queryDatasets", level="WARNING") as cm: - tables = self._queryDatasets(repo=testRepo.butler, limit=-1, order_by=("-visit")) + tables = self._queryDatasets( + repo=testRepo.butler, limit=-1, order_by=("-visit"), collections="*", glob="*" + ) self.assertIn("increase this limit", cm.output[0]) expectedTables = [ @@ -376,7 +386,7 @@ def testFindFirstAndCollections(self): testRepo.butler.collections.redefine_chain("chain", ["foo", "ingest/run"]) # Verify that without find-first, duplicate datasets are returned - tables = self._queryDatasets(repo=testRepo.butler, collections=["chain"], show_uri=True) + tables = self._queryDatasets(repo=testRepo.butler, collections=["chain"], show_uri=True, glob="*") # The test should be running with a single FileDatastore. roots = testRepo.butler.get_datastore_roots() @@ -519,7 +529,7 @@ def testFindFirstAndCollections(self): # Verify that with find first the duplicate dataset is eliminated and # the more recent dataset is returned. tables = self._queryDatasets( - repo=testRepo.butler, collections=["chain"], show_uri=True, find_first=True + repo=testRepo.butler, collections=["chain"], show_uri=True, find_first=True, glob="*" ) expectedTables = ( @@ -621,7 +631,13 @@ def testFindFirstAndCollections(self): # Verify that globs are not supported with find_first=True. with self.assertRaises(RuntimeError): - self._queryDatasets(repo=testRepo.butler, collections=["*"], show_uri=True, find_first=True) + self._queryDatasets( + repo=testRepo.butler, collections=["*"], show_uri=True, find_first=True, glob="*" + ) + + # Collections argument will become mandatory soon + with self.assertWarns(FutureWarning): + self._queryDatasets(repo=testRepo.butler, glob="*") if __name__ == "__main__": diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py index b416dcfe38..af1299ffdc 100644 --- a/tests/test_query_relations.py +++ b/tests/test_query_relations.py @@ -244,6 +244,8 @@ def test_spatial_constraints(self) -> None: # could special-case skypix dimensions that are coarser than the common # dimension and part of the same system to simplify both the SQL query # and avoid post-query filtering, but we don't at present. + with self.assertWarns(FutureWarning): + relation_string = self.butler.registry.queryDataIds(["patch"], htm11=self.htm11) self.assert_relation_str( f""" Π[patch, skymap, tract]( @@ -265,7 +267,7 @@ def test_spatial_constraints(self) -> None: ) ) """, - self.butler.registry.queryDataIds(["patch"], htm11=self.htm11), + relation_string, ) # Constrain a regular spatial dimension (patch) from the common # skypix dimension. This does not require post-query filtering. @@ -286,6 +288,10 @@ def test_spatial_constraints(self) -> None: # and also constrain via a skypix dimension other than the common one. # Once again we could special-case this for skypix dimensions that are # coarser than the common dimension in the same syste, but we don't. + with self.assertWarns(FutureWarning): + relation_string = self.butler.registry.queryDataIds( + ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 + ) self.assert_relation_str( # This query also doesn't need visit or physical_filter joined in, # but we can live with that. @@ -319,9 +325,7 @@ def test_spatial_constraints(self) -> None: ) ) """, - self.butler.registry.queryDataIds( - ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 - ), + relation_string, ) # Constrain a regular dimension (detector) via a different dimension # (visit) that combine together to define a more fine-grained region, diff --git a/tests/test_remote_butler.py b/tests/test_remote_butler.py index 8b53927257..e1ed3522c2 100644 --- a/tests/test_remote_butler.py +++ b/tests/test_remote_butler.py @@ -150,6 +150,7 @@ class RemoteButlerRegistryTests(RegistryTests): supportsDetailedQueryExplain = False supportsQueryOffset = False supportsQueryGovernorValidation = False + supportsNonCommonSkypixQueries = False # Jim decided to drop these expressions from the new query system -- they # can be written less ambiguously by writing e.g. ``time < diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 68d1a484e3..6de7b81dfb 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -725,18 +725,21 @@ def testWildcardQueries(self): ("*oll*", {"collection", "coll3"}), ("*[0-9]", {"coll3"}), ] - if self.supportsCollectionRegex: - expressions.extend( - [ - (re.compile("u.*"), {"u/user/test"}), - (re.compile(".*oll.*"), {"collection", "coll3"}), - ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), - ] - ) for expression, expected in expressions: result = butler.registry.queryCollections(expression) self.assertEqual(set(result), expected) + if self.supportsCollectionRegex: + expressions = [ + (re.compile("u.*"), {"u/user/test"}), + (re.compile(".*oll.*"), {"collection", "coll3"}), + ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), + ] + for expression, expected in expressions: + with self.assertWarns(FutureWarning): + result = butler.registry.queryCollections(expression) + self.assertEqual(set(result), expected) + def test_skypix_templates(self): """Test that skypix templates can work.""" # Dimension Records