diff --git a/src/pudl/etl/cli.py b/src/pudl/etl/cli.py index 1e3e9c2ff..182de2a10 100644 --- a/src/pudl/etl/cli.py +++ b/src/pudl/etl/cli.py @@ -23,7 +23,10 @@ def pudl_etl_job_factory( - logfile: str | None = None, loglevel: str = "INFO", process_epacems: bool = True + logfile: str | None = None, + loglevel: str = "INFO", + process_epacems: bool = True, + base_job: str = "etl_full", ) -> Callable[[], JobDefinition]: """Factory for parameterizing a reconstructable pudl_etl job. @@ -36,11 +39,11 @@ def pudl_etl_job_factory( The job definition to be executed. """ - def get_pudl_etl_job(job_name: str | None = None): + def get_pudl_etl_job(): """Create an pudl_etl_job wrapped by to be wrapped by reconstructable.""" pudl.logging_helpers.configure_root_logger(logfile=logfile, loglevel=loglevel) - if job_name is None: - job_name = "etl_full_no_cems" if not process_epacems else "etl_full" + cems_suffix = "" if process_epacems else "_no_cems" + job_name = f"{base_job}{cems_suffix}" return defs.get_job_def(job_name) return get_pudl_etl_job diff --git a/src/pudl/transform/vcerare.py b/src/pudl/transform/vcerare.py index 840651f95..de109fdb8 100644 --- a/src/pudl/transform/vcerare.py +++ b/src/pudl/transform/vcerare.py @@ -365,6 +365,8 @@ def check_rows(context: AssetCheckExecutionContext) -> AssetCheckResult: logger.info("Check VCE RARE hourly table is the expected length") # Define row counts for fast/full etl + # TODO 2024-12-27: make this check row counts per year instead of having + # two different counts based on job name - less brittle. row_counts = { "etl_full": 136437000, "etl_fast": 27287400, diff --git a/test/conftest.py b/test/conftest.py index 20837864e..c52ba73c8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -363,7 +363,7 @@ def pudl_io_manager( md = PUDL_PACKAGE.to_sql() md.create_all(engine) # Run the ETL and generate a new PUDL SQLite DB for testing: - execute_result = pudl_etl_job_factory()("etl_fast").execute_in_process( + execute_result = pudl_etl_job_factory(base_job="etl_fast")().execute_in_process( run_config={ "resources": { "dataset_settings": {