From 878b12292c2364ebd1382e54e36b6e97eb60feaa Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Sun, 20 Oct 2024 09:48:04 -0600 Subject: [PATCH] Post v2024.10.0 release errata cleanup (#3917) * Consolidate timeseries row groups in VCE RARE * Fix bad S3 download link for VCE RARE parquet file. * Update docs/data_access.rst Co-authored-by: E. Belfer <37471869+e-belfer@users.noreply.github.com> --------- Co-authored-by: E. Belfer <37471869+e-belfer@users.noreply.github.com> --- README.rst | 16 ++++++++++++++-- docs/data_access.rst | 2 +- src/pudl/metadata/resources/vcerare.py | 8 ++++---- src/pudl/transform/vcerare.py | 8 ++++++-- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index a06b64d262..27a08f2dd3 100644 --- a/README.rst +++ b/README.rst @@ -29,10 +29,22 @@ The Public Utility Data Liberation Project (PUDL) :alt: Schedule a 1-on-1 chat with us about PUDL. .. |mastodon| image:: https://img.shields.io/mastodon/follow/110855618428885893?domain=https%3A%2F%2Fmastodon.energy&style=social&color=%23000000&link=https%3A%2F%2Fmastodon.energy%2F%40catalystcoop :target: https://mastodon.energy/@catalystcoop - :alt: Follow CatalystCoop on Mastodon + :alt: Follow Catalyst Cooperative on Mastodon +.. |linkedin| image:: https://img.shields.io/badge/LinkedIn-0077B5?style=flat&logo=linkedin&logoColor=white + :target: https://linkedin.com/company/catalyst-cooperative/ + :alt: Follow Catalyst Cooperative on LinkedIn +.. |bluesky| image:: https://img.shields.io/badge/Bluesky-0285FF?logo=bluesky&logoColor=fff&style=flat + :target: https://bsky.app/profile/catalyst.coop + :alt: Follow @catalyst.coop on BlueSky +.. |kaggle| image:: https://img.shields.io/badge/Kaggle-20BEFF?style=flat&logo=Kaggle&logoColor=white + :target: https://www.kaggle.com/datasets/catalystcooperative/pudl-project + :alt: The PUDL Dataset on Kaggle +.. |aws| image:: https://img.shields.io/badge/Amazon_AWS-FF9900?style=flat&logo=amazonaws&logoColor=white + :target: https://registry.opendata.aws/catalyst-cooperative-pudl/ + :alt: PUDL in the AWS Open Data Registry |repo-status| |pytest| |codecov| |rtd| |ruff| |pre-commit-ci| |zenodo-doi| -|office-hours| |mastodon| +|office-hours| |mastodon| |linkedin| |bluesky| |kaggle| |aws| What is PUDL? ------------- diff --git a/docs/data_access.rst b/docs/data_access.rst index 9c6c667756..8a6ac4507e 100644 --- a/docs/data_access.rst +++ b/docs/data_access.rst @@ -130,7 +130,7 @@ so we have moved to publishing all our hourly tables using the compressed, colum * `FERC-714 Hourly Estimated State Demand `__ * `FERC-714 Hourly Planning Area Demand `__ * `GridPath RA Toolkit Hourly Available Capacity Factors `__ -* `VCE Resoruce Adequacy Renewable Energy Dataset `__ +* `VCE Resource Adequacy Renewable Energy (RARE) Dataset `__ Raw FERC DBF & XBRL data converted to SQLite ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/pudl/metadata/resources/vcerare.py b/src/pudl/metadata/resources/vcerare.py index 533d69d7b8..8391ca407d 100644 --- a/src/pudl/metadata/resources/vcerare.py +++ b/src/pudl/metadata/resources/vcerare.py @@ -42,19 +42,19 @@ ), "schema": { "fields": [ + "state", + "county_or_lake_name", "datetime_utc", - "hour_of_year", "report_year", + "hour_of_year", "county_id_fips", - "county_or_lake_name", - "state", "latitude", "longitude", "capacity_factor_solar_pv", "capacity_factor_onshore_wind", "capacity_factor_offshore_wind", ], - "primary_key": ["datetime_utc", "state", "county_or_lake_name"], + "primary_key": ["state", "county_or_lake_name", "datetime_utc"], }, "sources": ["vcerare"], "field_namespace": "vcerare", diff --git a/src/pudl/transform/vcerare.py b/src/pudl/transform/vcerare.py index 3ad2c9fa7e..8634b1abf4 100644 --- a/src/pudl/transform/vcerare.py +++ b/src/pudl/transform/vcerare.py @@ -282,8 +282,12 @@ def out_vcerare__hourly_available_capacity_factor( for df_name, df in raw_dict.items() } # Combine the data and perform a few last cleaning mechanisms - return _combine_all_cap_fac_dfs(clean_dict).pipe( - _combine_cap_fac_with_fips_df, fips_df + # Sort the data by primary key columns to produce compact row groups + return ( + _combine_all_cap_fac_dfs(clean_dict) + .pipe(_combine_cap_fac_with_fips_df, fips_df) + .sort_values(by=["state", "county_or_lake_name", "datetime_utc"]) + .reset_index(drop=True) )