Skip to content

Commit

Permalink
Add dataset_types and governors to CollectionInfo
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Aug 16, 2024
1 parent 32d0753 commit 141f8f1
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 9 deletions.
22 changes: 21 additions & 1 deletion python/lsst/daf/butler/_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ class CollectionInfo(BaseModel):
`None` if the parents were not requested.
"""
dataset_types: frozenset[str] | None = None
"""Names of any dataset types associated with datasets in this collection.
`None` if no dataset type information was requested
"""
governors: dict[str, frozenset[str]] | None = None
"""Values of any governor dimensions associated with datasets in this
collection.
`None` if no governor information was requested.
"""

def __lt__(self, other: Any) -> bool:
"""Compare objects by collection name."""
Expand Down Expand Up @@ -271,6 +282,7 @@ def x_query_info(
flatten_chains: bool = False,
include_chains: bool | None = None,
include_parents: bool = False,
include_summary: bool = False,
) -> Sequence[CollectionInfo]:
"""Query the butler for collections matching an expression and
return detailed information about those collections.
Expand All @@ -293,6 +305,9 @@ def x_query_info(
include either CHAINED collections or their children, but not both.
include_parents : `bool`, optional
Whether the returned information includes parents.
include_summary : `bool`, optional
Whether the returned information includes dataset type and
governor information for the collections.
Returns
-------
Expand All @@ -311,7 +326,9 @@ def x_query_info(
raise NotImplementedError()

@abstractmethod
def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
def get_info(
self, name: str, include_parents: bool = False, include_summary: bool = False
) -> CollectionInfo:
"""Obtain information for a specific collection.
Parameters
Expand All @@ -320,6 +337,9 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
The name of the collection of interest.
include_parents : `bool`, optional
If `True` any parents of this collection will be included.
include_summary : `bool`, optional
If `True` dataset type names and governor dimensions of datasets
stored in this collection will be included in the result.
Returns
-------
Expand Down
26 changes: 23 additions & 3 deletions python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def x_query_info(
flatten_chains: bool = False,
include_chains: bool | None = None,
include_parents: bool = False,
include_summary: bool = False,
) -> Sequence[CollectionInfo]:
info = []
with self._registry.caching_context():
Expand All @@ -115,10 +116,14 @@ def x_query_info(
flattenChains=flatten_chains,
includeChains=include_chains,
):
info.append(self.get_info(name, include_parents=include_parents))
info.append(
self.get_info(name, include_parents=include_parents, include_summary=include_summary)
)
return info

def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
def get_info(
self, name: str, include_parents: bool = False, include_summary: bool = False
) -> CollectionInfo:
record = self._registry.get_collection_record(name)
doc = self._registry.getCollectionDocumentation(name) or ""
children: tuple[str, ...] = tuple()
Expand All @@ -128,7 +133,22 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
parents: set[str] | None = None
if include_parents:
parents = self._registry.getCollectionParentChains(name)
return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
governors: dict[str, frozenset[str]] | None = None
dataset_types: Set[str] | None = None
if include_summary:
summary = self._registry.getCollectionSummary(name)
dataset_types = frozenset([dt.name for dt in summary.dataset_types])
governors = {k: frozenset(v) for k, v in summary.governors.items()}

Check warning on line 141 in python/lsst/daf/butler/direct_butler/_direct_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_butler/_direct_butler_collections.py#L139-L141

Added lines #L139 - L141 were not covered by tests

return CollectionInfo(
name=name,
type=record.type,
doc=doc,
parents=parents,
children=children,
dataset_types=dataset_types,
governors=governors,
)

def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
return self._registry.registerCollection(name, type, doc)
Expand Down
23 changes: 20 additions & 3 deletions python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def x_query_info(
flatten_chains: bool = False,
include_chains: bool | None = None,
include_parents: bool = False,
include_summary: bool = False,
) -> Sequence[CollectionInfo]:
# This should become a single call on the server in the future.
if collection_types is None:
Expand All @@ -90,14 +91,30 @@ def x_query_info(
flattenChains=flatten_chains,
includeChains=include_chains,
):
info.append(self.get_info(name, include_parents=include_parents))
info.append(self.get_info(name, include_parents=include_parents, include_summary=include_summary))
return info

def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
def get_info(
self, name: str, include_parents: bool = False, include_summary: bool = False
) -> CollectionInfo:
info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
doc = info.doc or ""
children = info.children or ()
return CollectionInfo(name=name, type=info.type, doc=doc, parents=info.parents, children=children)
governors: dict[str, frozenset[str]] | None = None
dataset_types: Set[str] | None = None
if include_summary:
summary = self._registry.getCollectionSummary(name)
dataset_types = frozenset([dt.name for dt in summary.dataset_types])
governors = {k: frozenset(v) for k, v in summary.governors.items()}

Check warning on line 108 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L106-L108

Added lines #L106 - L108 were not covered by tests
return CollectionInfo(
name=name,
type=info.type,
doc=doc,
parents=info.parents,
children=children,
dataset_types=dataset_types,
governors=governors,
)

def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
raise NotImplementedError("Not yet available.")
Expand Down
10 changes: 8 additions & 2 deletions python/lsst/daf/butler/tests/hybrid_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,23 @@ def x_query_info(
flatten_chains: bool = False,
include_chains: bool | None = None,
include_parents: bool = False,
include_summary: bool = False,
) -> Sequence[CollectionInfo]:
return self._hybrid._remote_butler.collections.x_query_info(
expression,
collection_types=collection_types,
flatten_chains=flatten_chains,
include_chains=include_chains,
include_parents=include_parents,
include_summary=include_summary,
)

def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
return self._hybrid._remote_butler.collections.get_info(name, include_parents=include_parents)
def get_info(
self, name: str, include_parents: bool = False, include_summary: bool = False
) -> CollectionInfo:
return self._hybrid._remote_butler.collections.get_info(
name, include_parents=include_parents, include_summary=include_summary
)

def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)
Expand Down

0 comments on commit 141f8f1

Please sign in to comment.