diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py index 5d40d20de0..4789a62010 100644 --- a/python/lsst/daf/butler/_butler_collections.py +++ b/python/lsst/daf/butler/_butler_collections.py @@ -54,6 +54,17 @@ class CollectionInfo(BaseModel): `None` if the parents were not requested. """ + dataset_types: frozenset[str] | None = None + """Names of any dataset types associated with datasets in this collection. + + `None` if no dataset type information was requested + """ + governors: dict[str, frozenset[str]] | None = None + """Values of any governor dimensions associated with datasets in this + collection. + + `None` if no governor information was requested. + """ def __lt__(self, other: Any) -> bool: """Compare objects by collection name.""" @@ -271,6 +282,7 @@ def x_query_info( flatten_chains: bool = False, include_chains: bool | None = None, include_parents: bool = False, + include_summary: bool = False, ) -> Sequence[CollectionInfo]: """Query the butler for collections matching an expression and return detailed information about those collections. @@ -293,6 +305,9 @@ def x_query_info( include either CHAINED collections or their children, but not both. include_parents : `bool`, optional Whether the returned information includes parents. + include_summary : `bool`, optional + Whether the returned information includes dataset type and + governor information for the collections. Returns ------- @@ -311,7 +326,9 @@ def x_query_info( raise NotImplementedError() @abstractmethod - def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: + def get_info( + self, name: str, include_parents: bool = False, include_summary: bool = False + ) -> CollectionInfo: """Obtain information for a specific collection. Parameters @@ -320,6 +337,9 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: The name of the collection of interest. include_parents : `bool`, optional If `True` any parents of this collection will be included. + include_summary : `bool`, optional + If `True` dataset type names and governor dimensions of datasets + stored in this collection will be included in the result. Returns ------- diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py index 30b3b401bc..15a3f5d3a1 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py @@ -104,6 +104,7 @@ def x_query_info( flatten_chains: bool = False, include_chains: bool | None = None, include_parents: bool = False, + include_summary: bool = False, ) -> Sequence[CollectionInfo]: info = [] with self._registry.caching_context(): @@ -115,10 +116,14 @@ def x_query_info( flattenChains=flatten_chains, includeChains=include_chains, ): - info.append(self.get_info(name, include_parents=include_parents)) + info.append( + self.get_info(name, include_parents=include_parents, include_summary=include_summary) + ) return info - def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: + def get_info( + self, name: str, include_parents: bool = False, include_summary: bool = False + ) -> CollectionInfo: record = self._registry.get_collection_record(name) doc = self._registry.getCollectionDocumentation(name) or "" children: tuple[str, ...] = tuple() @@ -128,7 +133,22 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: parents: set[str] | None = None if include_parents: parents = self._registry.getCollectionParentChains(name) - return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children) + governors: dict[str, frozenset[str]] | None = None + dataset_types: Set[str] | None = None + if include_summary: + summary = self._registry.getCollectionSummary(name) + dataset_types = frozenset([dt.name for dt in summary.dataset_types]) + governors = {k: frozenset(v) for k, v in summary.governors.items()} + + return CollectionInfo( + name=name, + type=record.type, + doc=doc, + parents=parents, + children=children, + dataset_types=dataset_types, + governors=governors, + ) def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool: return self._registry.registerCollection(name, type, doc) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py index 35b3d67be0..c5cddbef17 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py @@ -78,6 +78,7 @@ def x_query_info( flatten_chains: bool = False, include_chains: bool | None = None, include_parents: bool = False, + include_summary: bool = False, ) -> Sequence[CollectionInfo]: # This should become a single call on the server in the future. if collection_types is None: @@ -90,14 +91,30 @@ def x_query_info( flattenChains=flatten_chains, includeChains=include_chains, ): - info.append(self.get_info(name, include_parents=include_parents)) + info.append(self.get_info(name, include_parents=include_parents, include_summary=include_summary)) return info - def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: + def get_info( + self, name: str, include_parents: bool = False, include_summary: bool = False + ) -> CollectionInfo: info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents) doc = info.doc or "" children = info.children or () - return CollectionInfo(name=name, type=info.type, doc=doc, parents=info.parents, children=children) + governors: dict[str, frozenset[str]] | None = None + dataset_types: Set[str] | None = None + if include_summary: + summary = self._registry.getCollectionSummary(name) + dataset_types = frozenset([dt.name for dt in summary.dataset_types]) + governors = {k: frozenset(v) for k, v in summary.governors.items()} + return CollectionInfo( + name=name, + type=info.type, + doc=doc, + parents=info.parents, + children=children, + dataset_types=dataset_types, + governors=governors, + ) def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool: raise NotImplementedError("Not yet available.") diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py index 4ac8688e6d..6c9e438e79 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py +++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py @@ -84,6 +84,7 @@ def x_query_info( flatten_chains: bool = False, include_chains: bool | None = None, include_parents: bool = False, + include_summary: bool = False, ) -> Sequence[CollectionInfo]: return self._hybrid._remote_butler.collections.x_query_info( expression, @@ -91,10 +92,15 @@ def x_query_info( flatten_chains=flatten_chains, include_chains=include_chains, include_parents=include_parents, + include_summary=include_summary, ) - def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo: - return self._hybrid._remote_butler.collections.get_info(name, include_parents=include_parents) + def get_info( + self, name: str, include_parents: bool = False, include_summary: bool = False + ) -> CollectionInfo: + return self._hybrid._remote_butler.collections.get_info( + name, include_parents=include_parents, include_summary=include_summary + ) def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool: return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)