From 5a122154131cd9b765d1d0a2bf1a985d2e5c24b9 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 1 Feb 2024 16:10:05 -0700 Subject: [PATCH] Restore group-by-metadata visit definition with group dimension This requires that we call a method on the Instrument class to get the group_id from the name. --- python/lsst/obs/base/defineVisits.py | 57 ++++++++++++++++++++-------- tests/test_defineVisits.py | 13 +++++-- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/python/lsst/obs/base/defineVisits.py b/python/lsst/obs/base/defineVisits.py index 98984a50..d6f3cd65 100644 --- a/python/lsst/obs/base/defineVisits.py +++ b/python/lsst/obs/base/defineVisits.py @@ -239,7 +239,9 @@ def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[Di raise NotImplementedError() @abstractmethod - def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]: + def group( + self, exposures: list[DimensionRecord], instrument: Instrument + ) -> Iterable[VisitDefinitionData]: """Group the given exposures into visits. Parameters @@ -247,6 +249,9 @@ def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionDat exposures : `list` [ `DimensionRecord` ] DimensionRecords (for the 'exposure' dimension) describing the exposures to group. + instrument : `~lsst.pipe.base.Instrument` + Instrument specification that can be used to optionally support + some visit ID definitions. Returns ------- @@ -669,6 +674,7 @@ def run( # instrument in play, and check for non-science exposures. exposures = [] instruments = set() + instrument_cls_name: str | None = None for dataId in data_id_set: record = dataId.records["exposure"] assert record is not None, "Guaranteed by expandDataIds call earlier." @@ -681,6 +687,9 @@ def run( f"{record.observation_type}, but is not on sky." ) instruments.add(dataId["instrument"]) + instrument_record = dataId.records["instrument"] + if instrument_record is not None: + instrument_cls_name = instrument_record.class_name exposures.append(record) if not exposures: self.log.info("No on-sky exposures found after filtering.") @@ -691,6 +700,12 @@ def run( f"from the same instrument; got {instruments}." ) (instrument,) = instruments + + # Might need the instrument class for later depending on universe + # and grouping scheme. + assert instrument_cls_name is not None, "Instrument must be defined in this dataId" + instrument_helper = Instrument.from_string(instrument_cls_name) + # Ensure the visit_system our grouping algorithm uses is in the # registry, if it wasn't already. visitSystems = self.groupExposures.getVisitSystems() @@ -710,7 +725,7 @@ def run( # Group exposures into visits, delegating to subtask. self.log.info("Grouping %d exposure(s) into visits.", len(exposures)) - definitions = list(self.groupExposures.group(exposures)) + definitions = list(self.groupExposures.group(exposures, instrument_helper)) # Iterate over visits, compute regions, and insert dimension data, one # transaction per visit. If a visit already exists, we skip all other # inserts. @@ -857,7 +872,9 @@ def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[Di # No grouping. return {exposure.id: [exposure] for exposure in exposures} - def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]: + def group( + self, exposures: list[DimensionRecord], instrument: Instrument + ) -> Iterable[VisitDefinitionData]: # Docstring inherited from GroupExposuresTask. visit_systems = {VisitSystem.from_name("one-to-one")} for exposure in exposures: @@ -940,19 +957,27 @@ def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[Di groups[getattr(exposure, group_key)].append(exposure) return groups - def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]: + def group( + self, exposures: list[DimensionRecord], instrument: Instrument + ) -> Iterable[VisitDefinitionData]: # Docstring inherited from GroupExposuresTask. visit_systems = {VisitSystem.from_name("by-group-metadata")} groups = self.group_exposures(exposures) + has_group_dimension: bool | None = None for visitName, exposuresInGroup in groups.items(): - instrument = exposuresInGroup[0].instrument - visitId = exposuresInGroup[0].group_id - assert all( - e.group_id == visitId for e in exposuresInGroup - ), "Grouping by exposure.group_name does not yield consistent group IDs" + instrument_name = exposuresInGroup[0].instrument + assert instrument_name == instrument.getName(), "Inconsistency in instrument name" + visit_ids: set[int] = set() + if has_group_dimension is None: + has_group_dimension = hasattr(exposuresInGroup[0], "group") + if has_group_dimension: + visit_ids = {instrument.group_name_to_group_id(e.group) for e in exposuresInGroup} + else: + visit_ids = {e.group_id for e in exposuresInGroup} + assert len(visit_ids) == 1, "Grouping by exposure group does not yield consistent group IDs" yield VisitDefinitionData( - instrument=instrument, - id=visitId, + instrument=instrument_name, + id=visit_ids.pop(), name=visitName, exposures=exposuresInGroup, visit_systems=visit_systems, @@ -1028,14 +1053,16 @@ def group_exposures(self, exposures: list[DimensionRecord]) -> dict[Any, list[Di groups[exposure.day_obs, exposure.seq_start, exposure.seq_end].append(exposure) return groups - def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionData]: + def group( + self, exposures: list[DimensionRecord], instrument: Instrument + ) -> Iterable[VisitDefinitionData]: # Docstring inherited from GroupExposuresTask. system_one_to_one = VisitSystem.from_name("one-to-one") system_seq_start_end = VisitSystem.from_name("by-seq-start-end") groups = self.group_exposures(exposures) for visit_key, exposures_in_group in groups.items(): - instrument = exposures_in_group[0].instrument + instrument_name = exposures_in_group[0].instrument # It is possible that the first exposure in a visit has not # been ingested. This can be determined and if that is the case @@ -1082,7 +1109,7 @@ def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionDat visit_id = int(f"9{visit_id}") yield VisitDefinitionData( - instrument=instrument, + instrument=instrument_name, id=visit_id, name=visit_name, exposures=[exposure], @@ -1096,7 +1123,7 @@ def group(self, exposures: list[DimensionRecord]) -> Iterable[VisitDefinitionDat visit_id = first.id yield VisitDefinitionData( - instrument=instrument, + instrument=instrument_name, id=visit_id, name=visit_name, exposures=exposures_in_group, diff --git a/tests/test_defineVisits.py b/tests/test_defineVisits.py index e09281fe..c6009e82 100644 --- a/tests/test_defineVisits.py +++ b/tests/test_defineVisits.py @@ -210,7 +210,14 @@ def assertVisits(self): """Check that the visits were registered as expected.""" visits = list(self.butler.registry.queryDimensionRecords("visit")) self.assertEqual(len(visits), 2) - self.assertEqual({visit.id for visit in visits}, {2291434132550000, 2291434871810000}) + + # The visit ID itself depends on which universe we are using. + # It is either calculated or comes from the JSON record. + if "group" in self.butler.dimensions["exposure"].implied: + visit_ids = [20220406025653255, 20220406025807181] + else: + visit_ids = [2291434132550000, 2291434871810000] + self.assertEqual({visit.id for visit in visits}, set(visit_ids)) # Ensure that the definitions are correct (ignoring order). defmap = defaultdict(set) @@ -221,8 +228,8 @@ def assertVisits(self): self.assertEqual( dict(defmap), { - 2291434132550000: {2022040500347}, - 2291434871810000: {2022040500348, 2022040500349}, + visit_ids[0]: {2022040500347}, + visit_ids[1]: {2022040500348, 2022040500349}, }, )