Merge pull request #1045 from lsst/tickets/DM-45556

DM-45556: Switch command-line tools to new query interface
lsst · Aug 7, 2024 · 427f673 · 427f673
2 parents a63955a + b7cc26f
commit 427f673
Show file tree

Hide file tree

Showing 14 changed files with 231 additions and 133 deletions.
diff --git a/doc/changes/DM-45556.misc.rst b/doc/changes/DM-45556.misc.rst
@@ -0,0 +1,2 @@
+The command-line tools have been modified to use the new query system and interface.
+The only user visible changes are that the ``--no-check`` and ``--offset`` options are no longer used since they are not supported by the new system.
diff --git a/pyproject.toml b/pyproject.toml
@@ -134,9 +134,6 @@ line_length = 110
 write_to = "python/lsst/daf/butler/version.py"
 
 [tool.pytest.ini_options]
-# The matplotlib test may not release font files.
-# Some unit tests open registry database in setUpClass.
-open_files_ignore = ["*.ttf", "gen3.sqlite3"]
 # These require additional environment setup that isn't available during
 # normal unit test runs
 addopts = "--ignore=tests_integration"

diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -568,6 +568,7 @@ def query_data_ids(**kwargs: Any) -> None:
 @click.option(
     "--no-check",
     is_flag=True,
+    default=None,
     help=unwrap(
         """Don't check the query before execution. By default the query is checked before it
                           executed, this may reject some valid queries that resemble common mistakes."""
@@ -576,6 +577,8 @@ def query_data_ids(**kwargs: Any) -> None:
 @options_file_option()
 def query_dimension_records(**kwargs: Any) -> None:
     """Query for dimension information."""
+    if kwargs.pop("no_check") is not None:
+        click.echo("WARNING: --no-check option now has no effect and is ignored.")
     table = script.queryDimensionRecords(**kwargs)
     if table:
         table.pprint_all()

diff --git a/python/lsst/daf/butler/cli/utils.py b/python/lsst/daf/butler/cli/utils.py
@@ -62,6 +62,7 @@
 import sys
 import textwrap
 import traceback
+import types
 import uuid
 import warnings
 from collections import Counter
@@ -158,6 +159,60 @@ def textTypeStr(multiple: bool) -> str:
     return typeStrAcceptsMultiple if multiple else typeStrAcceptsSingle
 
 
+class ClickExitFailedNicely:
+    """Exit a Click command that failed.
+
+    This class is used to control the behavior when a click command has failed.
+    By default the exception will be logged and a non-zero exit status will
+    be used.
+
+    Parameters
+    ----------
+    exc_type : `type`
+        The type of the exception.
+    exc_value : `Exception`
+        The `Exception` object.
+    exc_tb : `types.TracebackType`
+        The traceback for this exception.
+    """
+
+    use_bad_status: bool = True
+    """Control how a bad command is exited. `True` indicates bad status and
+    `False` indicates a `click.ClickException`."""
+
+    def __init__(self, exc_type: type[BaseException], exc_value: BaseException, exc_tb: types.TracebackType):
+        self.exc_type = exc_type
+        self.exc_value = exc_value
+        self.exc_tb = self._clean_tb(exc_tb)
+
+    def _clean_tb(self, exc_tb: types.TracebackType) -> types.TracebackType:
+        if exc_tb.tb_next:
+            # Do not show the decorator in traceback.
+            exc_tb = exc_tb.tb_next
+        return exc_tb
+
+    def exit_click(self) -> None:
+        if self.use_bad_status:
+            self.exit_click_command_bad_status()
+        else:
+            self.exit_click_command_click_exception()
+
+    def exit_click_command_bad_status(self) -> None:
+        """Exit a click command with bad exit status and report log message."""
+        log.exception(
+            "Caught an exception, details are in traceback:",
+            exc_info=(self.exc_type, self.exc_value, self.exc_tb),
+        )
+        # Tell click to stop, this never returns.
+        click.get_current_context().exit(1)
+
+    def exit_click_command_click_exception(self) -> None:
+        """Exit a click command raising ClickException."""
+        tb = traceback.format_tb(self.exc_tb)
+        errmsg = "".join(tb) + str(self.exc_value)
+        raise click.ClickException(errmsg)
+
+
 class LogCliRunner(click.testing.CliRunner):
     """A test runner to use when the logging system will be initialized by code
     under test, calls CliLog.resetLog(), which undoes any logging setup that
@@ -169,8 +224,15 @@ class LogCliRunner(click.testing.CliRunner):
     """
 
     def invoke(self, *args: Any, **kwargs: Any) -> click.testing.Result:
-        result = super().invoke(*args, **kwargs)
+        # We want exceptions to be reported to the test runner rather than
+        # being converted to a simple exit status. The default is to
+        # use a logger but the click test infrastructure doesn't capture that
+        # in result.
+        with patch.object(ClickExitFailedNicely, "use_bad_status", False):
+            result = super().invoke(*args, **kwargs)
         CliLog.resetLog()
+        if result.exception:
+            print("Failing command was: ", args)
         return result
 
 
@@ -857,6 +919,7 @@ class MWCommand(click.Command):
         Keyword arguments for `click.Command`.
     """
 
+    name = "butler"
     extra_epilog: str | None = None
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -1218,13 +1281,7 @@ def inner(*args: Any, **kwargs: Any) -> None:
             assert exc_type is not None
             assert exc_value is not None
             assert exc_tb is not None
-            if exc_tb.tb_next:
-                # do not show this decorator in traceback
-                exc_tb = exc_tb.tb_next
-            log.exception(
-                "Caught an exception, details are in traceback:", exc_info=(exc_type, exc_value, exc_tb)
-            )
-            # tell click to stop, this never returns.
-            click.get_current_context().exit(1)
+            exit_hdl = ClickExitFailedNicely(exc_type, exc_value, exc_tb)
+            exit_hdl.exit_click()
 
     return inner
diff --git a/python/lsst/daf/butler/queries/_query.py b/python/lsst/daf/butler/queries/_query.py
@@ -35,7 +35,7 @@
 from lsst.utils.iteration import ensure_iterable
 
 from .._dataset_type import DatasetType
-from .._exceptions import InvalidQueryError
+from .._exceptions import DimensionNameError, InvalidQueryError
 from .._storage_class import StorageClassFactory
 from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup
 from ..registry import DatasetTypeError
@@ -281,6 +281,11 @@ def dimension_records(self, element: str) -> DimensionRecordQueryResults:
         records : `.queries.DimensionRecordQueryResults`
             Data IDs matching the given query parameters.
         """
+        if element not in self._driver.universe:
+            # Prefer an explicit exception over a KeyError below.
+            raise DimensionNameError(
+                f"No such dimension '{element}', available dimensions: " + str(self._driver.universe.elements)
+            )
         tree = self._tree
         if element not in tree.dimensions.elements:
             tree = tree.join_dimensions(self._driver.universe[element].minimal_group)

diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -78,8 +78,13 @@ def certifyCalibrations(
     if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
         input_collection = next(iter(registry.getCollectionChain(input_collection)))
 
-    refs = set(registry.queryDatasets(dataset_type_name, collections=[input_collection]))
-    if not refs:
-        raise RuntimeError(f"No inputs found for dataset {dataset_type_name} in {input_collection}.")
+    with butler._query() as query:
+        results = query.datasets(dataset_type_name, collections=input_collection)
+        refs = set(results)
+        if not refs:
+            explanation = "\n".join(results.explain_no_results())
+            raise RuntimeError(
+                f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
+            )
     registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
     registry.certify(output_collection, refs, timespan)
diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
@@ -29,6 +29,7 @@
 import logging
 import os
 from collections.abc import Iterable
+from operator import attrgetter
 from typing import TYPE_CHECKING
 
 from astropy.table import Table
@@ -37,20 +38,20 @@
 from ..registry import CollectionType
 
 if TYPE_CHECKING:
-    from lsst.daf.butler import DatasetRef, DatasetType, Registry
+    from lsst.daf.butler import DatasetRef, DatasetType
 
 log = logging.getLogger(__name__)
 
 
-def parseCalibrationCollection(
-    registry: Registry, collection: str, datasetTypes: Iterable[DatasetType]
-) -> tuple[list[str], list[DatasetRef]]:
+def find_calibration_datasets(
+    butler: Butler, collection: str, datasetTypes: Iterable[DatasetType]
+) -> list[DatasetRef]:
     """Search a calibration collection for calibration datasets.
 
     Parameters
     ----------
-    registry : `lsst.daf.butler.Registry`
-        Butler registry to use.
+    butler : `lsst.daf.butler.Butler`
+        Butler to use.
     collection : `str`
         Collection to search.  This should be a CALIBRATION
         collection.
@@ -59,8 +60,6 @@ def parseCalibrationCollection(
 
     Returns
     -------
-    exportCollections : `list` [`str`]
-        List of collections to save on export.
     exportDatasets : `list` [`lsst.daf.butler.DatasetRef`]
         Datasets to save on export.
 
@@ -69,24 +68,22 @@ def parseCalibrationCollection(
     RuntimeError
         Raised if the collection to search is not a CALIBRATION collection.
     """
-    if registry.getCollectionType(collection) != CollectionType.CALIBRATION:
+    if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
         raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
 
-    exportCollections = []
     exportDatasets = []
     for calibType in datasetTypes:
-        associations = registry.queryDatasetAssociations(
-            calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
-        )
-        for result in associations:
-            # Need an expanded dataId in case file templates will be used
-            # in the transfer.
-            dataId = registry.expandDataId(result.ref.dataId)
-            ref = result.ref.expanded(dataId)
-            exportDatasets.append(ref)
-            assert ref.run is not None, "These refs must all be resolved."
-            exportCollections.append(ref.run)
-    return exportCollections, exportDatasets
+        with butler._query() as query:
+            results = query.datasets(calibType, collections=collection, find_first=False)
+
+            try:
+                refs = list(results.with_dimension_records())
+            except Exception as e:
+                e.add_note(f"Error from querying dataset type {calibType} and collection {collection}")
+                raise
+            exportDatasets.extend(refs)
+
+    return exportDatasets
 
 
 def exportCalibs(
@@ -148,10 +145,7 @@ def exportCalibs(
         collectionsToExport.append(collection)
         collectionType = butler.registry.getCollectionType(collection)
         if collectionType == CollectionType.CALIBRATION:
-            exportCollections, exportDatasets = parseCalibrationCollection(
-                butler.registry, collection, calibTypes
-            )
-            collectionsToExport.extend(exportCollections)
+            exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
             datasetsToExport.extend(exportDatasets)
 
     if os.path.exists(directory):
@@ -170,14 +164,14 @@ def exportCalibs(
         log.info("Saving %d dataset(s)", len(datasetsToExport))
         export.saveDatasets(datasetsToExport)
 
-    sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name)
+    sortedDatasets = sorted(datasetsToExport, key=attrgetter("datasetType.name", "dataId"))
 
     requiredDimensions: set[str] = set()
     for ref in sortedDatasets:
         requiredDimensions.update(ref.dimensions.names)
     dimensionColumns = {
         dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets]
-        for dimensionName in requiredDimensions
+        for dimensionName in sorted(requiredDimensions)
     }
 
     return Table(

diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
@@ -28,7 +28,6 @@
 
 import logging
 from collections.abc import Iterable
-from types import EllipsisType
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -131,14 +130,20 @@ def queryDataIds(
     Docstring for supported parameters is the same as
     `~lsst.daf.butler.Registry.queryDataIds`.
     """
+    if offset:
+        raise RuntimeError("Offset is no longer supported by the query system.")
+
     butler = Butler.from_config(repo, without_datastore=True)
 
+    dataset_types = []
+    if datasets:
+        dataset_types = list(butler.registry.queryDatasetTypes(datasets))
+
     if datasets and collections and not dimensions:
         # Determine the dimensions relevant to all given dataset types.
         # Since we are going to AND together all dimensions, we can not
         # seed the result with an empty set.
         dataset_type_dimensions: DimensionGroup | None = None
-        dataset_types = list(butler.registry.queryDatasetTypes(datasets))
         for dataset_type in dataset_types:
             if dataset_type_dimensions is None:
                 # Seed with dimensions of first dataset type.
@@ -160,26 +165,35 @@ def queryDataIds(
         dimensions = set(dataset_type_dimensions.names)
         _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets)
 
-    query_collections: Iterable[str] | EllipsisType | None = None
-    if datasets:
-        query_collections = collections or ...
-    results = butler.registry.queryDataIds(
-        dimensions, datasets=datasets, where=where, collections=query_collections
-    )
-
-    if order_by:
-        results = results.order_by(*order_by)
-    if limit > 0:
-        new_offset = offset if offset > 0 else None
-        results = results.limit(limit, new_offset)
-
-    if results.any(exact=False):
-        if results.dimensions:
-            table = _Table(results)
-            if not table.dataIds:
-                return None, "Post-query region filtering removed all rows, since nothing overlapped."
-            return table.getAstropyTable(not order_by), None
+    with butler._query() as query:
+        if datasets:
+            # Need to constrain results based on dataset type and collection.
+            query_collections = collections or ...
+
+            expanded_collections = butler.registry.queryCollections(query_collections)
+
+            sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
+            for dt in dataset_types:
+                sub_query = sub_query.join_dataset_search(dt, collections=expanded_collections)
+
+            results = sub_query.data_ids(dimensions)
+        else:
+            results = query.data_ids(dimensions)
+
+        if where:
+            results = results.where(where)
+        if order_by:
+            results = results.order_by(*order_by)
+        if limit > 0:
+            results = results.limit(limit)
+
+        if results.any(exact=False):
+            if results.dimensions:
+                table = _Table(results)
+                if not table.dataIds:
+                    return None, "Post-query region filtering removed all rows, since nothing overlapped."
+                return table.getAstropyTable(not order_by), None
+            else:
+                return None, "Result has one logical row but no columns because no dimensions were requested."
         else:
-            return None, "Result has one logical row but no columns because no dimensions were requested."
-    else:
-        return None, "\n".join(results.explain_no_results())
+            return None, "\n".join(results.explain_no_results())