From 5825a2619b8b3ca6818238d62522e431b956e9c7 Mon Sep 17 00:00:00 2001 From: "E. Belfer" <37471869+e-belfer@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:58:55 -0500 Subject: [PATCH] Pass `sources` to `DataSource.from_id()` (#4013) * Pass sources to from_id() * Update release notes * Fix from_field_namespace --- docs/release_notes.rst | 9 +++++++++ src/pudl/metadata/classes.py | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 91c56adec..ec0fbcfbc 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -32,6 +32,15 @@ Bug Fixes Major Dependency Updates ^^^^^^^^^^^^^^^^^^^^^^^^ +Quality of Life Improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* We added a ``sources`` parameter to ``pudl.metadata.classes.DataSource.from_id()`` + in order to make it possible to use the `pudl-archiver + `__ repository to + archive datasets that won't necessarily be ingested into PUDL. See `this PUDL archiver + issue `__ and PRs + :pr:`4003` and :pr:`4013`. + .. _release-v2024.11.0: --------------------------------------------------------------------------------------- diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index a8357f6be..bb73ca298 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -1039,22 +1039,22 @@ def from_field_namespace( ) -> list["DataSource"]: """Return list of DataSource objects by field namespace.""" return [ - cls(**cls.dict_from_id(name)) + cls(**cls.dict_from_id(name, sources)) for name, val in sources.items() if val.get("field_namespace") == x ] @staticmethod - def dict_from_id(x: str, sources: dict[str, Any] = SOURCES) -> dict: + def dict_from_id(x: str, sources: dict[str, Any]) -> dict: """Look up the source by source name in the metadata.""" # If ID ends with _xbrl strip end to find data source lookup_id = x.replace("_xbrl", "") return {"name": x, **copy.deepcopy(sources[lookup_id])} @classmethod - def from_id(cls, x: str) -> "DataSource": + def from_id(cls, x: str, sources: dict[str, Any] = SOURCES) -> "DataSource": """Construct Source by source name in the metadata.""" - return cls(**cls.dict_from_id(x)) + return cls(**cls.dict_from_id(x, sources=sources)) class ResourceHarvest(PudlMeta):