Skip to content

Commit

Permalink
Add a way to mark which dimensions populate others.
Browse files Browse the repository at this point in the history
In the future, this will be used to make it easier to export dimension
data without knowing the details of the dimension combinations that
represent many-to-main joins (DM-34838).

There's no real gain to updating the dimensions configuration before
that happens, but merging these changes early is useful because it
will make software versions with just this change much better able to
handle future data repositories that use them in their dimensions
configuration, even if the associated functionality isn't available as
a result.
  • Loading branch information
TallJimbo committed Jul 28, 2023
1 parent a997e9e commit 2192768
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/lsst/daf/butler/core/dimensions/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def _extractElementVisitors(self) -> Iterator[DimensionConstructionVisitor]:
metadata=metadata,
alwaysJoin=subconfig.get("always_join", False),
uniqueKeys=uniqueKeys,
populated_by=subconfig.get("populated_by", None),
)

def _extractTopologyVisitors(self) -> Iterator[DimensionConstructionVisitor]:
Expand Down
15 changes: 15 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,12 @@ def __init__(
implied: NamedValueAbstractSet[Dimension],
metadata: NamedValueAbstractSet[ddl.FieldSpec],
alwaysJoin: bool,
populated_by: Dimension | None,
):
super().__init__(name, storage=storage, implied=implied, metadata=metadata)
self._required = required
self._alwaysJoin = alwaysJoin
self._populated_by = populated_by

@property
def required(self) -> NamedValueAbstractSet[Dimension]:
Expand All @@ -403,6 +405,11 @@ def alwaysJoin(self) -> bool:
# Docstring inherited from DimensionElement.
return self._alwaysJoin

@property
def populated_by(self) -> Dimension | None:
# Docstring inherited.
return self._populated_by


class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor):
"""Construction visitor for database dimension and dimension combination.
Expand Down Expand Up @@ -439,6 +446,9 @@ class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor):
relationship between those dimensions that must always be satisfied.
Should only be provided when a `DimensionCombination` is being
constructed.
populated_by: `Dimension`, optional
The dimension that this element's records are always inserted,
exported, and imported alongside.
"""

def __init__(
Expand All @@ -450,6 +460,7 @@ def __init__(
metadata: Iterable[ddl.FieldSpec] = (),
uniqueKeys: Iterable[ddl.FieldSpec] = (),
alwaysJoin: bool = False,
populated_by: str | None = None,
):
super().__init__(name)
self._storage = storage
Expand All @@ -458,6 +469,7 @@ def __init__(
self._metadata = NamedValueSet(metadata).freeze()
self._uniqueKeys = NamedValueSet(uniqueKeys).freeze()
self._alwaysJoin = alwaysJoin
self._populated_by = populated_by

def hasDependenciesIn(self, others: Set[str]) -> bool:
# Docstring inherited from DimensionConstructionVisitor.
Expand Down Expand Up @@ -501,5 +513,8 @@ def visit(self, builder: DimensionConstructionBuilder) -> None:
implied=implied.freeze(),
metadata=self._metadata,
alwaysJoin=self._alwaysJoin,
populated_by=(
builder.dimensions[self._populated_by] if self._populated_by is not None else None
),
)
builder.elements.add(combination)
21 changes: 21 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,22 @@ def alwaysJoin(self) -> bool:
"""
return False

@property
@abstractmethod
def populated_by(self) -> Dimension | None:
"""The dimension that this element's records are always inserted,
exported, and imported alongside.
Notes
-----
When this is `None` (as it will be, at least at first, for any data
repositories created before this attribute was added), records for
this element will often need to be exported manually when datasets
associated with some other related dimension are exported, in order for
the post-import data repository to function as expected.
"""
raise NotImplementedError()


class Dimension(DimensionElement):
"""A dimension.
Expand Down Expand Up @@ -376,6 +392,11 @@ def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
_, *alternateKeys = self.uniqueKeys
return NamedValueSet(alternateKeys).freeze()

@property
def populated_by(self) -> Dimension:
# Docstring inherited.
return self


class DimensionCombination(DimensionElement):
"""Element with extra information.
Expand Down
14 changes: 14 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import logging
import math
import pickle
from collections import defaultdict
from collections.abc import Iterable, Mapping
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar

Expand Down Expand Up @@ -170,6 +171,11 @@ def __new__(
# and implied. This is used for encode/decode.
self._dimensionIndices = {name: i for i, name in enumerate(self._dimensions.names)}

self._populates = defaultdict(NamedValueSet)
for element in self._elements:
if element.populated_by is not None:
self._populates[element.populated_by.name].add(element)

return self

@property
Expand Down Expand Up @@ -483,6 +489,12 @@ def getEncodeLength(self) -> int:
"""
return math.ceil(len(self._dimensions) / 8)

def get_elements_populated_by(self, dimension: Dimension) -> NamedValueAbstractSet[DimensionElement]:
"""Return the set of `DimensionElement` objects whose
`~DimensionElement.populated_by` atttribute is the given dimension.
"""
return self._populates[dimension.name]

@classmethod
def _unpickle(cls, version: int, namespace: str | None = None) -> DimensionUniverse:
"""Return an unpickled dimension universe.
Expand Down Expand Up @@ -542,6 +554,8 @@ def __deepcopy__(self, memo: dict) -> DimensionUniverse:

_packers: dict[str, DimensionPackerFactory]

_populates: defaultdict[str, NamedValueSet[DimensionElement]]

_version: int

_namespace: str

0 comments on commit 2192768

Please sign in to comment.