Skip to content

Commit

Permalink
Merge pull request #1045 from lsst/tickets/DM-45556
Browse files Browse the repository at this point in the history
DM-45556: Switch command-line tools to new query interface
  • Loading branch information
timj authored Aug 7, 2024
2 parents a63955a + b7cc26f commit 427f673
Show file tree
Hide file tree
Showing 14 changed files with 231 additions and 133 deletions.
2 changes: 2 additions & 0 deletions doc/changes/DM-45556.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The command-line tools have been modified to use the new query system and interface.
The only user visible changes are that the ``--no-check`` and ``--offset`` options are no longer used since they are not supported by the new system.
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ line_length = 110
write_to = "python/lsst/daf/butler/version.py"

[tool.pytest.ini_options]
# The matplotlib test may not release font files.
# Some unit tests open registry database in setUpClass.
open_files_ignore = ["*.ttf", "gen3.sqlite3"]
# These require additional environment setup that isn't available during
# normal unit test runs
addopts = "--ignore=tests_integration"
Expand Down
3 changes: 3 additions & 0 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ def query_data_ids(**kwargs: Any) -> None:
@click.option(
"--no-check",
is_flag=True,
default=None,
help=unwrap(
"""Don't check the query before execution. By default the query is checked before it
executed, this may reject some valid queries that resemble common mistakes."""
Expand All @@ -576,6 +577,8 @@ def query_data_ids(**kwargs: Any) -> None:
@options_file_option()
def query_dimension_records(**kwargs: Any) -> None:
"""Query for dimension information."""
if kwargs.pop("no_check") is not None:
click.echo("WARNING: --no-check option now has no effect and is ignored.")
table = script.queryDimensionRecords(**kwargs)
if table:
table.pprint_all()
Expand Down
75 changes: 66 additions & 9 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import sys
import textwrap
import traceback
import types
import uuid
import warnings
from collections import Counter
Expand Down Expand Up @@ -158,6 +159,60 @@ def textTypeStr(multiple: bool) -> str:
return typeStrAcceptsMultiple if multiple else typeStrAcceptsSingle


class ClickExitFailedNicely:
"""Exit a Click command that failed.
This class is used to control the behavior when a click command has failed.
By default the exception will be logged and a non-zero exit status will
be used.
Parameters
----------
exc_type : `type`
The type of the exception.
exc_value : `Exception`
The `Exception` object.
exc_tb : `types.TracebackType`
The traceback for this exception.
"""

use_bad_status: bool = True
"""Control how a bad command is exited. `True` indicates bad status and
`False` indicates a `click.ClickException`."""

def __init__(self, exc_type: type[BaseException], exc_value: BaseException, exc_tb: types.TracebackType):
self.exc_type = exc_type
self.exc_value = exc_value
self.exc_tb = self._clean_tb(exc_tb)

def _clean_tb(self, exc_tb: types.TracebackType) -> types.TracebackType:
if exc_tb.tb_next:
# Do not show the decorator in traceback.
exc_tb = exc_tb.tb_next
return exc_tb

def exit_click(self) -> None:
if self.use_bad_status:
self.exit_click_command_bad_status()
else:
self.exit_click_command_click_exception()

def exit_click_command_bad_status(self) -> None:
"""Exit a click command with bad exit status and report log message."""
log.exception(
"Caught an exception, details are in traceback:",
exc_info=(self.exc_type, self.exc_value, self.exc_tb),
)
# Tell click to stop, this never returns.
click.get_current_context().exit(1)

def exit_click_command_click_exception(self) -> None:
"""Exit a click command raising ClickException."""
tb = traceback.format_tb(self.exc_tb)
errmsg = "".join(tb) + str(self.exc_value)
raise click.ClickException(errmsg)


class LogCliRunner(click.testing.CliRunner):
"""A test runner to use when the logging system will be initialized by code
under test, calls CliLog.resetLog(), which undoes any logging setup that
Expand All @@ -169,8 +224,15 @@ class LogCliRunner(click.testing.CliRunner):
"""

def invoke(self, *args: Any, **kwargs: Any) -> click.testing.Result:
result = super().invoke(*args, **kwargs)
# We want exceptions to be reported to the test runner rather than
# being converted to a simple exit status. The default is to
# use a logger but the click test infrastructure doesn't capture that
# in result.
with patch.object(ClickExitFailedNicely, "use_bad_status", False):
result = super().invoke(*args, **kwargs)
CliLog.resetLog()
if result.exception:
print("Failing command was: ", args)
return result


Expand Down Expand Up @@ -857,6 +919,7 @@ class MWCommand(click.Command):
Keyword arguments for `click.Command`.
"""

name = "butler"
extra_epilog: str | None = None

def __init__(self, *args: Any, **kwargs: Any) -> None:
Expand Down Expand Up @@ -1218,13 +1281,7 @@ def inner(*args: Any, **kwargs: Any) -> None:
assert exc_type is not None
assert exc_value is not None
assert exc_tb is not None
if exc_tb.tb_next:
# do not show this decorator in traceback
exc_tb = exc_tb.tb_next
log.exception(
"Caught an exception, details are in traceback:", exc_info=(exc_type, exc_value, exc_tb)
)
# tell click to stop, this never returns.
click.get_current_context().exit(1)
exit_hdl = ClickExitFailedNicely(exc_type, exc_value, exc_tb)
exit_hdl.exit_click()

return inner
7 changes: 6 additions & 1 deletion python/lsst/daf/butler/queries/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from lsst.utils.iteration import ensure_iterable

from .._dataset_type import DatasetType
from .._exceptions import InvalidQueryError
from .._exceptions import DimensionNameError, InvalidQueryError
from .._storage_class import StorageClassFactory
from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup
from ..registry import DatasetTypeError
Expand Down Expand Up @@ -281,6 +281,11 @@ def dimension_records(self, element: str) -> DimensionRecordQueryResults:
records : `.queries.DimensionRecordQueryResults`
Data IDs matching the given query parameters.
"""
if element not in self._driver.universe:
# Prefer an explicit exception over a KeyError below.
raise DimensionNameError(
f"No such dimension '{element}', available dimensions: " + str(self._driver.universe.elements)
)
tree = self._tree
if element not in tree.dimensions.elements:
tree = tree.join_dimensions(self._driver.universe[element].minimal_group)
Expand Down
11 changes: 8 additions & 3 deletions python/lsst/daf/butler/script/certifyCalibrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,13 @@ def certifyCalibrations(
if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
input_collection = next(iter(registry.getCollectionChain(input_collection)))

refs = set(registry.queryDatasets(dataset_type_name, collections=[input_collection]))
if not refs:
raise RuntimeError(f"No inputs found for dataset {dataset_type_name} in {input_collection}.")
with butler._query() as query:
results = query.datasets(dataset_type_name, collections=input_collection)
refs = set(results)
if not refs:
explanation = "\n".join(results.explain_no_results())
raise RuntimeError(
f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
)
registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
registry.certify(output_collection, refs, timespan)
50 changes: 22 additions & 28 deletions python/lsst/daf/butler/script/exportCalibs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import logging
import os
from collections.abc import Iterable
from operator import attrgetter
from typing import TYPE_CHECKING

from astropy.table import Table
Expand All @@ -37,20 +38,20 @@
from ..registry import CollectionType

if TYPE_CHECKING:
from lsst.daf.butler import DatasetRef, DatasetType, Registry
from lsst.daf.butler import DatasetRef, DatasetType

log = logging.getLogger(__name__)


def parseCalibrationCollection(
registry: Registry, collection: str, datasetTypes: Iterable[DatasetType]
) -> tuple[list[str], list[DatasetRef]]:
def find_calibration_datasets(
butler: Butler, collection: str, datasetTypes: Iterable[DatasetType]
) -> list[DatasetRef]:
"""Search a calibration collection for calibration datasets.
Parameters
----------
registry : `lsst.daf.butler.Registry`
Butler registry to use.
butler : `lsst.daf.butler.Butler`
Butler to use.
collection : `str`
Collection to search. This should be a CALIBRATION
collection.
Expand All @@ -59,8 +60,6 @@ def parseCalibrationCollection(
Returns
-------
exportCollections : `list` [`str`]
List of collections to save on export.
exportDatasets : `list` [`lsst.daf.butler.DatasetRef`]
Datasets to save on export.
Expand All @@ -69,24 +68,22 @@ def parseCalibrationCollection(
RuntimeError
Raised if the collection to search is not a CALIBRATION collection.
"""
if registry.getCollectionType(collection) != CollectionType.CALIBRATION:
if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")

exportCollections = []
exportDatasets = []
for calibType in datasetTypes:
associations = registry.queryDatasetAssociations(
calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION]
)
for result in associations:
# Need an expanded dataId in case file templates will be used
# in the transfer.
dataId = registry.expandDataId(result.ref.dataId)
ref = result.ref.expanded(dataId)
exportDatasets.append(ref)
assert ref.run is not None, "These refs must all be resolved."
exportCollections.append(ref.run)
return exportCollections, exportDatasets
with butler._query() as query:
results = query.datasets(calibType, collections=collection, find_first=False)

try:
refs = list(results.with_dimension_records())
except Exception as e:
e.add_note(f"Error from querying dataset type {calibType} and collection {collection}")
raise
exportDatasets.extend(refs)

return exportDatasets


def exportCalibs(
Expand Down Expand Up @@ -148,10 +145,7 @@ def exportCalibs(
collectionsToExport.append(collection)
collectionType = butler.registry.getCollectionType(collection)
if collectionType == CollectionType.CALIBRATION:
exportCollections, exportDatasets = parseCalibrationCollection(
butler.registry, collection, calibTypes
)
collectionsToExport.extend(exportCollections)
exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
datasetsToExport.extend(exportDatasets)

if os.path.exists(directory):
Expand All @@ -170,14 +164,14 @@ def exportCalibs(
log.info("Saving %d dataset(s)", len(datasetsToExport))
export.saveDatasets(datasetsToExport)

sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name)
sortedDatasets = sorted(datasetsToExport, key=attrgetter("datasetType.name", "dataId"))

requiredDimensions: set[str] = set()
for ref in sortedDatasets:
requiredDimensions.update(ref.dimensions.names)
dimensionColumns = {
dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets]
for dimensionName in requiredDimensions
for dimensionName in sorted(requiredDimensions)
}

return Table(
Expand Down
62 changes: 38 additions & 24 deletions python/lsst/daf/butler/script/queryDataIds.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import logging
from collections.abc import Iterable
from types import EllipsisType
from typing import TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -131,14 +130,20 @@ def queryDataIds(
Docstring for supported parameters is the same as
`~lsst.daf.butler.Registry.queryDataIds`.
"""
if offset:
raise RuntimeError("Offset is no longer supported by the query system.")

butler = Butler.from_config(repo, without_datastore=True)

dataset_types = []
if datasets:
dataset_types = list(butler.registry.queryDatasetTypes(datasets))

if datasets and collections and not dimensions:
# Determine the dimensions relevant to all given dataset types.
# Since we are going to AND together all dimensions, we can not
# seed the result with an empty set.
dataset_type_dimensions: DimensionGroup | None = None
dataset_types = list(butler.registry.queryDatasetTypes(datasets))
for dataset_type in dataset_types:
if dataset_type_dimensions is None:
# Seed with dimensions of first dataset type.
Expand All @@ -160,26 +165,35 @@ def queryDataIds(
dimensions = set(dataset_type_dimensions.names)
_LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets)

query_collections: Iterable[str] | EllipsisType | None = None
if datasets:
query_collections = collections or ...
results = butler.registry.queryDataIds(
dimensions, datasets=datasets, where=where, collections=query_collections
)

if order_by:
results = results.order_by(*order_by)
if limit > 0:
new_offset = offset if offset > 0 else None
results = results.limit(limit, new_offset)

if results.any(exact=False):
if results.dimensions:
table = _Table(results)
if not table.dataIds:
return None, "Post-query region filtering removed all rows, since nothing overlapped."
return table.getAstropyTable(not order_by), None
with butler._query() as query:
if datasets:
# Need to constrain results based on dataset type and collection.
query_collections = collections or ...

expanded_collections = butler.registry.queryCollections(query_collections)

sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
for dt in dataset_types:
sub_query = sub_query.join_dataset_search(dt, collections=expanded_collections)

results = sub_query.data_ids(dimensions)
else:
results = query.data_ids(dimensions)

if where:
results = results.where(where)
if order_by:
results = results.order_by(*order_by)
if limit > 0:
results = results.limit(limit)

if results.any(exact=False):
if results.dimensions:
table = _Table(results)
if not table.dataIds:
return None, "Post-query region filtering removed all rows, since nothing overlapped."
return table.getAstropyTable(not order_by), None
else:
return None, "Result has one logical row but no columns because no dimensions were requested."
else:
return None, "Result has one logical row but no columns because no dimensions were requested."
else:
return None, "\n".join(results.explain_no_results())
return None, "\n".join(results.explain_no_results())
Loading

0 comments on commit 427f673

Please sign in to comment.