diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 7b3e2c0d24..f593f0200d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -10,5 +10,6 @@ updates: - package-ecosystem: "github-actions" directory: "/" + target-branch: "dev" schedule: interval: "daily" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f0bfb7dd2..c381fa4ca1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ repos: # Quick content checks based on grepping for python specific patterns: - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.8.0 + rev: v1.9.0 hooks: - id: python-check-blanket-noqa # Prohibit overly broad QA exclusions. - id: python-no-eval # Never use eval() it's dangerous. @@ -35,7 +35,7 @@ repos: # Make sure import statements are sorted uniformly. - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.8.0 + rev: v5.9.3 hooks: - id: isort @@ -59,7 +59,7 @@ repos: # Check for errors in restructuredtext (.rst) files under the doc hierarchy - repo: https://github.com/PyCQA/doc8 - rev: 0.9.0a1 + rev: 0.9.0 hooks: - id: doc8 args: [--config, tox.ini] diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..138591f195 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,20 @@ +cff-version: 1.1.0 +message: "If you use PUDL, please cite it as indicated below." +authors: + - family-names: Selvans + given-names: Zane + orcid: https://orcid.org/0000-0002-9961-7208 + - family-names: Gosnell + given-names: Christina + - family-names: Winter + given-names: Steven + - family-names: Dunkle Werner + given-names: Karl + orcid: https://orcid.org/0000-0003-0523-7309 + - family-names: Shivley + given-names: Greg + orcid: https://orcid.org/0000-0002-8947-694X +title: "The Public Utility Data Liberation (PUDL) Project" +version: 0.3.2 +doi: 10.5281/zenodo.3404014 +date-released: 2020-02-17 diff --git a/README.rst b/README.rst index afa0e5fe11..ce80fd4850 100644 --- a/README.rst +++ b/README.rst @@ -20,10 +20,6 @@ The Public Utility Data Liberation Project (PUDL) :target: https://codecov.io/gh/catalyst-cooperative/pudl :alt: Codecov Test Coverage -.. image:: https://img.shields.io/codacy/grade/2fead07adef249c08288d0bafae7cbb5 - :target: https://app.codacy.com/app/zaneselvans/pudl - :alt: Codacy Grade - .. image:: https://img.shields.io/pypi/v/catalystcoop.pudl :target: https://pypi.org/project/catalystcoop.pudl/ :alt: PyPI Latest Version @@ -59,15 +55,15 @@ PUDL currently integrates data from: * `EIA Form 860 `__ (2004-2019) * `EIA Form 860m `__ (2020-2021) * `EIA Form 861 `__ (2001-2019) -* `EIA Form 923 `__ (2009-2019) +* `EIA Form 923 `__ (2001-2019) * `EPA Continuous Emissions Monitoring System (CEMS) `__ (1995-2020) * `FERC Form 1 `__ (1994-2019) * `FERC Form 714 `__ (2006-2019) * `US Census Demographic Profile 1 Geodatabase `__ (2010) -Thanks to support from the `Alfred P. Sloan Foundation Energy & Environment Program -`__, from 2021 to 2023 we will be -integrating the following data as well: +Thanks to support from the `Alfred P. Sloan Foundation Energy & Environment +Program `__, from +2021 to 2023 we will be integrating the following data as well: * `EIA Form 176 `__ (The Annual Report of Natural Gas Supply and Disposition) diff --git a/devtools/databeta.sh b/devtools/databeta.sh new file mode 100755 index 0000000000..5ceaa035fd --- /dev/null +++ b/devtools/databeta.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# A script to compile a Dockerized data release based on a user's local PUDL +# data environment. + +# Name of the directory to create the data release archive in +RELEASE_DIR=pudl-v0.4.0-2021-07-15 +# The PUDL working directory where we'll find the data to archive: +PUDL_IN=$HOME/code/catalyst/pudl-work +# Reference to an existing Docker image to pull +DOCKER_TAG="2021.03.27" + +echo "Started:" `date` +# Start with a clean slate: +rm -rf $RELEASE_DIR +mkdir -p $RELEASE_DIR +# The release container / environment is based on the pudl-examples repo: +git clone --depth 1 git@github.com:catalyst-cooperative/pudl-examples.git $RELEASE_DIR +rm -rf $RELEASE_DIR/.git* +# These directories are where the data will go. They're integrated with the +# Docker container that's defined in the pudl-examples repo: +mkdir -p $RELEASE_DIR/pudl_data +mkdir -p $RELEASE_DIR/user_data + +# Freeze the version of the Docker container: +cat $RELEASE_DIR/docker-compose.yml | sed -e "s/pudl-jupyter:latest/pudl-jupyter:$DOCKER_TAG/" > $RELEASE_DIR/new-docker-compose.yml +mv $RELEASE_DIR/new-docker-compose.yml $RELEASE_DIR/docker-compose.yml +# Set up a skeleton PUDL environment in the release dir: +pudl_setup $RELEASE_DIR/pudl_data + +# These are probably outdated now... see if they fail. +rm -rf $RELEASE_DIR/pudl_data/environment.yml +rm -rf $RELEASE_DIR/pudl_data/notebook + +# Copy over all of the pre-processed data +echo "Copying SQLite databases..." +cp -v $PUDL_IN/sqlite/ferc1.sqlite $RELEASE_DIR/pudl_data/sqlite/ +cp -v $PUDL_IN/sqlite/pudl.sqlite $RELEASE_DIR/pudl_data/sqlite/ +cp -v $PUDL_IN/sqlite/censusdp1tract.sqlite $RELEASE_DIR/pudl_data/sqlite/ +echo "Copying Parquet datasets..." +cp -r $PUDL_IN/parquet/epacems $RELEASE_DIR/pudl_data/parquet/ + +# Save the Docker image as a tarball so it can be archived with the data: +docker save catalystcoop/pudl-jupyter:$DOCKER_TAG -o $RELEASE_DIR/pudl-jupyter.tar + +# List the high-level contents of the archive so we can see what it contains: +find $RELEASE_DIR -maxdepth 3 + +# Create the archive +tar -czf $RELEASE_DIR.tgz $RELEASE_DIR + +echo "Finished:" `date` diff --git a/notebooks/work-in-progress/eia-extract-transform.ipynb b/devtools/eia-etl-debug.ipynb similarity index 92% rename from notebooks/work-in-progress/eia-extract-transform.ipynb rename to devtools/eia-etl-debug.ipynb index 17775db885..a6bb18d7ac 100644 --- a/notebooks/work-in-progress/eia-extract-transform.ipynb +++ b/devtools/eia-etl-debug.ipynb @@ -62,9 +62,9 @@ "outputs": [], "source": [ "eia923_tables = pc.pudl_tables['eia923']\n", - "eia923_years = [2018, 2019]\n", + "eia923_years = list(range(2001, 2020))\n", "eia860_tables = pc.pudl_tables['eia860']\n", - "eia860_years = [2018, 2019]" + "eia860_years = list(range(2004, 2020))" ] }, { @@ -83,6 +83,13 @@ "ds = pudl.workspace.datastore.Datastore(local_cache_path=Path(pudl_settings[\"data_dir\"]))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EIA-860" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -105,7 +112,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Extract just the EIA-923" + "## Transform just the EIA-860" ] }, { @@ -115,15 +122,22 @@ "outputs": [], "source": [ "%%time\n", - "eia923_extractor = pudl.extract.eia923.Extractor(ds)\n", - "eia923_raw_dfs = eia923_extractor.extract(year=eia923_years)" + "eia860_transformed_dfs = pudl.transform.eia860.transform(\n", + " eia860_raw_dfs, eia860_tables=eia860_tables)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Transform just the EIA-860" + "# EIA-923" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract just the EIA-923" ] }, { @@ -133,8 +147,8 @@ "outputs": [], "source": [ "%%time\n", - "eia860_transformed_dfs = pudl.transform.eia860.transform(\n", - " eia860_raw_dfs, eia860_tables=eia860_tables)" + "eia923_extractor = pudl.extract.eia923.Extractor(ds)\n", + "eia923_raw_dfs = eia923_extractor.extract(year=eia923_years)" ] }, { @@ -155,6 +169,13 @@ " eia923_raw_dfs, eia923_tables=eia923_tables)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combined EIA Data" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -223,7 +244,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -237,7 +258,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/docs/api/pudl.output.glue.rst b/docs/api/pudl.output.glue.rst deleted file mode 100644 index 329d73c5c9..0000000000 --- a/docs/api/pudl.output.glue.rst +++ /dev/null @@ -1,7 +0,0 @@ -pudl.output.glue module -======================= - -.. automodule:: pudl.output.glue - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/pudl.output.rst b/docs/api/pudl.output.rst index 19f9285b76..aebfab7382 100644 --- a/docs/api/pudl.output.rst +++ b/docs/api/pudl.output.rst @@ -13,7 +13,6 @@ Submodules pudl.output.epacems pudl.output.ferc1 pudl.output.ferc714 - pudl.output.glue pudl.output.pudltabl Module contents diff --git a/docs/conf.py b/docs/conf.py index 68c553c4f5..f5afa5b2e2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ # -- Project information ----------------------------------------------------- project = 'PUDL' -copyright = '2016-2021, Catalyst Cooperative' # noqa: A001 +copyright = '2016-2021, Catalyst Cooperative, CC-BY-4.0' # noqa: A001 author = 'Catalyst Cooperative' # -- General configuration --------------------------------------------------- @@ -38,9 +38,15 @@ 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx_issues', + 'sphinx_reredirects', ] todo_include_todos = True +# Redirects to keep folks from hitting 404 errors: +redirects = { + "data_dictionary": "data_dictionaries/pudl_db.html", +} + # GitHub repo issues_github_path = "catalyst-cooperative/pudl" diff --git a/docs/data_sources/ferc1/ferc1_db_notes.csv b/docs/data_dictionaries/ferc1_db.csv similarity index 100% rename from docs/data_sources/ferc1/ferc1_db_notes.csv rename to docs/data_dictionaries/ferc1_db.csv diff --git a/docs/data_sources/ferc1_db_notes.rst b/docs/data_dictionaries/ferc1_db.rst similarity index 73% rename from docs/data_sources/ferc1_db_notes.rst rename to docs/data_dictionaries/ferc1_db.rst index d86e45a53a..d73c8f5d03 100644 --- a/docs/data_sources/ferc1_db_notes.rst +++ b/docs/data_dictionaries/ferc1_db.rst @@ -5,12 +5,6 @@ FERC Form 1 Data Dictionary We have mapped the Visual FoxPro DBF files to their corresponding FERC Form 1 database tables and provided a short description of the contents of each table here. -* :download:`A diagram of the 2015 FERC Form 1 Database (PDF) - ` -* :download:`Blank FERC Form 1 (PDF, to 2014-12-31) ` -* :download:`Blank FERC Form 1 (PDF, to 2019-12-31) ` -* :download:`Blank FERC Form 1 (PDF, to 2022-11-30) ` - .. note:: * The Table Names link to the contents of the database table on our `FERC Form 1 @@ -24,6 +18,6 @@ database tables and provided a short description of the contents of each table h Quarterly. A/Q if the data is reported both annually and quarterly. .. csv-table:: - :file: ferc1/ferc1_db_notes.csv + :file: ferc1_db.csv :header-rows: 1 :widths: auto diff --git a/docs/data_dictionaries/index.rst b/docs/data_dictionaries/index.rst new file mode 100644 index 0000000000..6ca44b0ad7 --- /dev/null +++ b/docs/data_dictionaries/index.rst @@ -0,0 +1,16 @@ +.. _data-dictionaries: + +Data Dictionaries +================= + +.. toctree:: + :caption: Data Processed & Cleaned by PUDL + :maxdepth: 1 + + pudl_db + +.. toctree:: + :caption: Raw, Unprocessed Data + :maxdepth: 1 + + ferc1_db diff --git a/docs/data_dictionary.rst b/docs/data_dictionaries/pudl_db.rst similarity index 93% rename from docs/data_dictionary.rst rename to docs/data_dictionaries/pudl_db.rst index 132cf374fb..edd5d7ce23 100644 --- a/docs/data_dictionary.rst +++ b/docs/data_dictionaries/pudl_db.rst @@ -3,11 +3,16 @@ PUDL Data Dictionary =============================================================================== +The following data tables have been cleaned and transformed by our ETL process. + + .. _assn_gen_eia_unit_epa: ------------------------------------------------------------------------------- assn_gen_eia_unit_epa ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -32,6 +37,8 @@ assn_gen_eia_unit_epa ------------------------------------------------------------------------------- assn_plant_id_eia_epa ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -53,6 +60,8 @@ assn_plant_id_eia_epa ------------------------------------------------------------------------------- boiler_fuel_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -95,6 +104,8 @@ boiler_fuel_eia923 ------------------------------------------------------------------------------- boiler_generator_assn_eia860 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -124,13 +135,15 @@ boiler_generator_assn_eia860 - EIA-assigned unit identification code. * - unit_id_pudl - integer - - PUDL-assigned unit identification number. + - Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time. .. _boilers_entity_eia: ------------------------------------------------------------------------------- boilers_entity_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -155,6 +168,8 @@ boilers_entity_eia ------------------------------------------------------------------------------- coalmine_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -188,6 +203,8 @@ coalmine_eia923 ------------------------------------------------------------------------------- energy_source_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -209,6 +226,11 @@ energy_source_eia923 ------------------------------------------------------------------------------- ferc_accounts ------------------------------------------------------------------------------- + +Account numbers from the FERC Uniform System of Accounts for Electric Plant, +which is defined in Code of Federal Regulations (CFR) Title 18, Chapter I, +Subchapter C, Part 101. (See e.g. +https://www.law.cornell.edu/cfr/text/18/part-101). `Browse or query this table in Datasette. `__ .. list-table:: @@ -230,6 +252,10 @@ ferc_accounts ------------------------------------------------------------------------------- ferc_depreciation_lines ------------------------------------------------------------------------------- + +PUDL assigned FERC Form 1 line identifiers and long descriptions from FERC +Form 1 page 219, Accumulated Provision for Depreciation of Electric Utility +Plant (Account 108). `Browse or query this table in Datasette. `__ .. list-table:: @@ -251,6 +277,11 @@ ferc_depreciation_lines ------------------------------------------------------------------------------- fuel_ferc1 ------------------------------------------------------------------------------- + +Annual fuel cost and quanitiy for steam plants with a capacity of 25+ MW, +internal combustion and gas-turbine plants of 10+ MW, and all nuclear plants. +As reported on page 402 of FERC Form 1 and extracted from the f1_fuel table in +FERC's FoxPro Database. `Browse or query this table in Datasette. `__ .. list-table:: @@ -299,6 +330,8 @@ fuel_ferc1 ------------------------------------------------------------------------------- fuel_receipts_costs_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -383,6 +416,8 @@ fuel_receipts_costs_eia923 ------------------------------------------------------------------------------- fuel_type_aer_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -404,6 +439,8 @@ fuel_type_aer_eia923 ------------------------------------------------------------------------------- fuel_type_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -425,6 +462,8 @@ fuel_type_eia923 ------------------------------------------------------------------------------- generation_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -452,6 +491,8 @@ generation_eia923 ------------------------------------------------------------------------------- generation_fuel_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -490,7 +531,7 @@ generation_fuel_eia923 - Net generation, year to date in megawatthours (MWh). This is total electrical output net of station service. In the case of combined heat and power plants, this value is intended to include internal consumption of electricity for the purposes of a production process, as well as power put on the grid. * - nuclear_unit_id - integer - - For nuclear plants only, the unit number .One digit numeric. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level. + - For nuclear plants only. This unit ID appears to correspond directly to the generator ID, as reported in the EIA-860. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level. Note that nuclear plants only report their fuel consumption and net generation in the generation_fuel_eia923 table and not elsewhere. * - plant_id_eia - integer - The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration. @@ -506,6 +547,8 @@ generation_fuel_eia923 ------------------------------------------------------------------------------- generators_eia860 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -716,6 +759,8 @@ generators_eia860 ------------------------------------------------------------------------------- generators_entity_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -794,6 +839,8 @@ generators_entity_eia ------------------------------------------------------------------------------- hourly_emissions_epacems ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -863,6 +910,8 @@ hourly_emissions_epacems ------------------------------------------------------------------------------- ownership_eia860 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -911,6 +960,17 @@ ownership_eia860 ------------------------------------------------------------------------------- plant_in_service_ferc1 ------------------------------------------------------------------------------- + +Balances and changes to FERC Electric Plant in Service accounts, as reported +on FERC Form 1. Data originally from the f1_plant_in_srvce table in FERC's +FoxPro database. Account numbers correspond to the FERC Uniform System of +Accounts for Electric Plant, which is defined in Code of Federal Regulations +(CFR) Title 18, Chapter I, Subchapter C, Part 101. (See e.g. +https://www.law.cornell.edu/cfr/text/18/part-101). Each FERC respondent +reports starting and ending balances for each account annually. Balances are +organization wide, and are not broken down on a per-plant basis. End of year +balance should equal beginning year balance plus the sum of additions, +retirements, adjustments, and transfers. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1217,6 +1277,8 @@ plant_in_service_ferc1 ------------------------------------------------------------------------------- plant_unit_epa ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1238,6 +1300,8 @@ plant_unit_epa ------------------------------------------------------------------------------- plants_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1262,6 +1326,8 @@ plants_eia ------------------------------------------------------------------------------- plants_eia860 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1352,6 +1418,8 @@ plants_eia860 ------------------------------------------------------------------------------- plants_entity_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1436,6 +1504,10 @@ plants_entity_eia ------------------------------------------------------------------------------- plants_ferc1 ------------------------------------------------------------------------------- + +Name, utility, and PUDL id for steam plants with a capacity of 25,000+ kW, +internal combustion and gas-turbine plants of 10,000+ kW, and all nuclear +plants. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1460,6 +1532,10 @@ plants_ferc1 ------------------------------------------------------------------------------- plants_hydro_ferc1 ------------------------------------------------------------------------------- + +Generating plant statistics for hydroelectric plants with an installed +nameplate capacity of 10 MW. As reported on FERC Form 1, pages 406-407 and +extracted from the f1_hydro table in FERC's FoxPro database. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1586,6 +1662,13 @@ plants_hydro_ferc1 ------------------------------------------------------------------------------- plants_pudl ------------------------------------------------------------------------------- + +Home table for PUDL assigned plant IDs. These IDs are manually generated each +year when new FERC and EIA reporting is integrated, and any newly identified +plants are added to the list with a new ID. Each ID maps to a power plant +which is reported in at least one FERC or EIA data set. This table is read in +from a spreadsheet stored in the PUDL repository: +src/pudl/package_data/glue/mapping_eia923_ferc1.xlsx `Browse or query this table in Datasette. `__ .. list-table:: @@ -1607,6 +1690,10 @@ plants_pudl ------------------------------------------------------------------------------- plants_pumped_storage_ferc1 ------------------------------------------------------------------------------- + +Generating plant statistics for hydroelectric pumped storage plants with an +installed nameplate capacity of 10+ MW. As reported on page 408 of FERC Form 1 +and extracted from the f1_pumped_storage table in FERC's FoxPro Database. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1745,6 +1832,12 @@ plants_pumped_storage_ferc1 ------------------------------------------------------------------------------- plants_small_ferc1 ------------------------------------------------------------------------------- + +Generating plant statistics for steam plants with less than 25 MW installed +nameplate capacity and internal combustion plants, gas turbine-plants, +conventional hydro plants, and pumped storage plants with less than 10 MW +installed nameplate capacity. As reported on FERC Form 1 pages 410-411, and +extracted from the FERC FoxPro database table f1_gnrt_plant. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1814,6 +1907,11 @@ plants_small_ferc1 ------------------------------------------------------------------------------- plants_steam_ferc1 ------------------------------------------------------------------------------- + +Generating plant statistics for steam plants with a capacity of 25+ MW, +internal combustion and gas-turbine plants of 10+ MW, and all nuclear plants. +As reported on page 402 of FERC Form 1 and extracted from the f1_gnrt_plant +table in FERC's FoxPro Database. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1949,6 +2047,8 @@ plants_steam_ferc1 ------------------------------------------------------------------------------- prime_movers_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -1970,6 +2070,11 @@ prime_movers_eia923 ------------------------------------------------------------------------------- purchased_power_ferc1 ------------------------------------------------------------------------------- + +Purchased Power (Account 555) including power exchanges (i.e. transactions +involving a balancing of debits and credits for energy, capacity, etc.) and +any settlements for imbalanced exchanges. Reported on pages 326-327 of FERC +Form 1. Extracted from the f1_purchased_pwr table in FERC's FoxPro database. `Browse or query this table in Datasette. `__ .. list-table:: @@ -2033,6 +2138,8 @@ purchased_power_ferc1 ------------------------------------------------------------------------------- transport_modes_eia923 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -2054,6 +2161,8 @@ transport_modes_eia923 ------------------------------------------------------------------------------- utilities_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -2078,6 +2187,8 @@ utilities_eia ------------------------------------------------------------------------------- utilities_eia860 ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -2165,6 +2276,8 @@ utilities_eia860 ------------------------------------------------------------------------------- utilities_entity_eia ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: @@ -2186,6 +2299,13 @@ utilities_entity_eia ------------------------------------------------------------------------------- utilities_ferc1 ------------------------------------------------------------------------------- + +This table maps the manually assigned PUDL utility ID to a FERC respondent ID, +enabling a connection between the FERC and EIA data sets. It also stores the +utility name associated with the FERC respondent ID. Those values originate in +the f1_respondent_id table in FERC's FoxPro database, which is stored in a +file called F1_1.DBF. This table is generated from a spreadsheet stored in the +PUDL repository: results/id_mapping/mapping_eia923_ferc1.xlsx `Browse or query this table in Datasette. `__ .. list-table:: @@ -2210,6 +2330,13 @@ utilities_ferc1 ------------------------------------------------------------------------------- utilities_pudl ------------------------------------------------------------------------------- + +Home table for PUDL assigned utility IDs. These IDs are manually generated +each year when new FERC and EIA reporting is integrated, and any newly found +utilities are added to the list with a new ID. Each ID maps to a power plant +owning or operating entity which is reported in at least one FERC or EIA data +set. This table is read in from a spreadsheet stored in the PUDL repository: +src/pudl/package_data/glue/mapping_eia923_ferc1.xlsx `Browse or query this table in Datasette. `__ .. list-table:: @@ -2231,6 +2358,8 @@ utilities_pudl ------------------------------------------------------------------------------- utility_plant_assn ------------------------------------------------------------------------------- + +Pending description. `Browse or query this table in Datasette. `__ .. list-table:: diff --git a/docs/data_sources/eia860.rst b/docs/data_sources/eia860.rst index e02c6d176c..f3164f8a1f 100644 --- a/docs/data_sources/eia860.rst +++ b/docs/data_sources/eia860.rst @@ -2,21 +2,34 @@ EIA Form 860 =============================================================================== -=================== =========================================================== -Source URL https://www.eia.gov/electricity/data/eia860/ -Source Description | The status of existing electric generating plants and - | associated equipment in the United States, and those - | scheduled for initial commercial operation within 10 - | years of the filing. -Respondents Utilities -Source Format Microsoft Excel (.xls/.xlsx) -Source Years 2001-2019 -Size (Download) 413.4 MB -PUDL Code ``eia860`` -Years Liberated 2004-2019 -Records Liberated ~1 million -Issues `open EIA 860 issues `__ -=================== =========================================================== +.. list-table:: + :widths: auto + :header-rows: 0 + :stub-columns: 1 + + * - Source URL + - https://www.eia.gov/electricity/data/eia860/ + * - Source Description + - The status of existing electric generating plants and associated equipment in + the United States and those scheduled for initial commercial operation within 10 + years of the filing. + * - Respondents + - Utilities + * - Source Format + - Microsoft Excel (.xls/.xlsx) + * - Source Years + - 2001-2019 + * - Size (Download) + - 413.4 MB + * - PUDL Code + - ``eia860`` + * - Years Liberated + - 2004-2019 + * - Records Liberated + - ~1 million + * - Issues + - `Open EIA 860 issues `__ + Background ^^^^^^^^^^ @@ -26,6 +39,10 @@ and planned entities with one or more megawatt of capacity. The form also contai information regarding environmental control equipment and construction cost data from 2013-2018. +* :download:`EIA-860 Instructions (PDF, to 2013-10-31) + ` +* :download:`EIA-860 Instructions (PDF, to 2017-05-31) + ` * :download:`EIA-860 Instructions (PDF, to 2020-03-31) ` * :download:`EIA-860 Instructions (PDF, to 2023-05-31) @@ -98,22 +115,44 @@ PUDL Data Tables ^^^^^^^^^^^^^^^^ We've segmented the processed EIA-860 data into the following normalized data tables. -Clicking on the links will show you the names and descriptions of the fields available -in each table. - -* :ref:`generators_eia860` -* :ref:`ownership_eia860` -* :ref:`boiler_generator_assn_eia860` -* :ref:`plants_eia860` -* :ref:`utilities_eia860` +Clicking on the links will show you a description of the table as well as +the names and descriptions of each of its fields. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Data Dictionary + - Browse Online + * - :ref:`generators_eia860` + - https://data.catalyst.coop/pudl/generators_eia860 + * - :ref:`ownership_eia860` + - https://data.catalyst.coop/pudl/ownership_eia860 + * - :ref:`boiler_generator_assn_eia860` + - https://data.catalyst.coop/pudl/boiler_generator_assn_eia860 + * - :ref:`plants_eia860` + - https://data.catalyst.coop/pudl/plants_eia860 + * - :ref:`utilities_eia860` + - https://data.catalyst.coop/pudl/utilities_eia860 We've also created the following entity tables modeled after EIA data collected from -multiple tables +multiple tables. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Data Dictionary + - Browse Online + * - :ref:`boilers_entity_eia` + - https://data.catalyst.coop/pudl/boilers_entity_eia + * - :ref:`generators_entity_eia` + - https://data.catalyst.coop/pudl/generators_entity_eia + * - :ref:`plants_entity_eia` + - https://data.catalyst.coop/pudl/plants_entity_eia + * - :ref:`utilities_entity_eia` + - https://data.catalyst.coop/pudl/utilities_entity_eia -* :ref:`boilers_entity_eia` -* :ref:`generators_entity_eia` -* :ref:`plants_entity_eia` -* :ref:`utilities_entity_eia` PUDL Data Transformations ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/data_sources/eia923.rst b/docs/data_sources/eia923.rst index df0083395e..ca2422f6f6 100644 --- a/docs/data_sources/eia923.rst +++ b/docs/data_sources/eia923.rst @@ -2,20 +2,33 @@ EIA Form 923 =============================================================================== -=================== =========================================================== -Source URL https://www.eia.gov/electricity/data/eia923/ -Source Description Generation, consumption, stocks, receipts -Respondents | Electric, CHP plants, and sometimes fuel transfer terminals with - | either 1MW+ or the ability to receive and deliver power to the - | grid. -Source Format Microsoft Excel (.xls/.xlsx) -Source Years 2001-2019 -Size (Download) 243.3 MB -PUDL Code ``eia923`` -Years Liberated 2009-2019 -Records Liberated ~3.6 million -Issues `Open EIA 923 issues `__ -=================== =========================================================== +.. list-table:: + :widths: auto + :header-rows: 0 + :stub-columns: 1 + + * - Source URL + - https://www.eia.gov/electricity/data/eia923/ + * - Source Description + - Generation, consumption, stocks, receipts + * - Respondents + - Electric, CHP plants, and sometimes fuel transfer terminals with + either 1MW+ or the ability to receive and deliver power to the grid. + * - Source Format + - Microsoft Excel (.xls/.xlsx) + * - Source Years + - 2001-2019 + * - Size (Download) + - 243.3 MB + * - PUDL Code + - ``eia923`` + * - Years Liberated + - 2001-2019 + * - Records Liberated + - ~3.6 million + * - Issues + - `Open EIA 923 issues `__ + Background ^^^^^^^^^^ @@ -30,6 +43,12 @@ over the years, beginning as an environmental add-on in 2007 and ultimately ecli the information previously recorded in EIA-906, EIA-920, FERC 423, and EIA-423 by 2008. +* :download:`EIA-923 Instructions (PDF, to 2013-10-31) + ` +* :download:`EIA-923 Instructions (PDF, to 2015-12-31) + ` +* :download:`EIA-923 Instructions (PDF, to 2017-05-31) + ` * :download:`EIA-923 Instructions (PDF, to 2020-03-31) ` * :download:`EIA-923 Instructions (PDF, to 2023-05-31) @@ -108,28 +127,52 @@ rolled in with the state/fuel aggregates values reported under the plant id 9999 PUDL Database Tables ^^^^^^^^^^^^^^^^^^^^ We've segmented the processed EIA-923 data into the following normalized data tables. -Clicking on the links will show you the names and descriptions of the fields available -in each table. +Clicking on the links will show you a description of the table as well as the names and +descriptions of each of its fields. EIA-923 Data Tables ------------------- -These tables contain the bulk data reported in the EIA-923: +These tables contain the bulk data reported in the EIA-923. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Data Dictionary + - Browse Online + * - :ref:`boiler_fuel_eia923` + - https://data.catalyst.coop/pudl/boiler_fuel_eia923 + * - :ref:`coalmine_eia923` + - https://data.catalyst.coop/pudl/coalmine_eia923 + * - :ref:`fuel_receipts_costs_eia923` + - https://data.catalyst.coop/pudl/fuel_receipts_costs_eia923 + * - :ref:`generation_eia923` + - https://data.catalyst.coop/pudl/generation_eia923 + * - :ref:`generation_fuel_eia923` + - https://data.catalyst.coop/pudl/generation_fuel_eia923 -* :ref:`boiler_fuel_eia923` -* :ref:`coalmine_eia923` -* :ref:`fuel_receipts_costs_eia923` -* :ref:`generation_eia923` -* :ref:`generation_fuel_eia923` EIA-923 Structural Tables ------------------------- -These tables define various codes and abbreviations more fully: +These tables define various codes and abbreviations more fully. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Data Dictionary + - Browse Online + * - :ref:`energy_source_eia923` + - https://data.catalyst.coop/pudl/energy_source_eia923 + * - :ref:`fuel_type_aer_eia923` + - https://data.catalyst.coop/pudl/fuel_type_aer_eia923 + * - :ref:`fuel_type_eia923` + - https://data.catalyst.coop/pudl/fuel_type_eia923 + * - :ref:`prime_movers_eia923` + - https://data.catalyst.coop/pudl/prime_movers_eia923 + * - :ref:`transport_modes_eia923` + - https://data.catalyst.coop/pudl/transport_modes_eia923 -* :ref:`energy_source_eia923` -* :ref:`fuel_type_aer_eia923` -* :ref:`fuel_type_eia923` -* :ref:`prime_movers_eia923` -* :ref:`transport_modes_eia923` PUDL Data Transformations ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/data_sources/epacems.rst b/docs/data_sources/epacems.rst index 7822d01839..c7f5bbea89 100644 --- a/docs/data_sources/epacems.rst +++ b/docs/data_sources/epacems.rst @@ -2,18 +2,32 @@ EPA CEMS Hourly =============================================================================== -=================== =========================================================== -Source URL ftp://newftp.epa.gov/dmdnload/emissions/hourly/monthly -Source Description Hourly CO2, SO2, NOx emissions and gross load -Respondents Coal and high-sulfur fueled plants -Source Format Comma Separated Value (.csv) -Source Years 1995-2019 -Size (Download) 8.7 GB -PUDL Code ``epacems`` -Years Liberated 1995-2019 -Records Liberated ~1 billion -Issues `Open EPA CEMS issues `__ -=================== =========================================================== +.. list-table:: + :widths: auto + :header-rows: 0 + :stub-columns: 1 + + * - Source URL + - ftp://newftp.epa.gov/dmdnload/emissions/hourly/monthly + * - Source Description + - Hourly CO2, SO2, NOx emissions and gross load + * - Respondents + - Coal and high-sulfur fueled plants + * - Source Format + - Comma Separated Value (.csv) + * - Source Years + - 1995-2020 + * - Size (Download) + - 8.7 GB + * - PUDL Code + - ``epacems`` + * - Years Liberated + - 1995-2020 + * - Records Liberated + - ~1 billion + * - Issues + - `Open EPA CEMS issues `__ + Background ^^^^^^^^^^ @@ -78,10 +92,18 @@ on GitHub for pointers on how to access this big dataset efficiently using :mod: PUDL Data Tables ^^^^^^^^^^^^^^^^ -Clicking on the links will show you the names and descriptions of the fields available -in the CEMS table. +Clicking on the links will show you a description of the table as well as the names and +descriptions of each of its fields. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Data Dictionary + - Browse Online + * - :ref:`hourly_emissions_epacems` + - Not Available via Datasette -* :ref:`hourly_emissions_epacems` PUDL Data Transformations ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/data_sources/ferc1.rst b/docs/data_sources/ferc1.rst index f539a01c0f..46c2bcda0e 100644 --- a/docs/data_sources/ferc1.rst +++ b/docs/data_sources/ferc1.rst @@ -2,19 +2,33 @@ FERC Form 1 =============================================================================== -=================== =========================================================== -Source URL https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual -Source Description Financial and operational information from electric utilities, - licensees and others entities subject to FERC jurisdiction. -Respondents Major electric utilities and licensees -Source Format FoxPro Database (.DBC/.DBF) -Source Years 1994-2019 -Size (Download) 1.3 GB -PUDL Code ``ferc1`` -Years Liberated 1994-2019 -Records Liberated ~12 million (116 raw tables), ~316,000 (7 clean tables) -Issues `Open FERC Form 1 issues `__ -=================== =========================================================== +.. list-table:: + :widths: auto + :header-rows: 0 + :stub-columns: 1 + + * - Source URL + - https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual + * - Source Description + - Financial and operational information from electric utilities, + licensees and others entities subject to FERC jurisdiction. + * - Respondents + - Major electric utilities and licensees. + * - Source Format + - FoxPro Database (.DBC/.DBF) + * - Source Years + - 1994-2019 + * - Size (Download) + - 1.3 GB + * - PUDL Code + - ``ferc1`` + * - Years Liberated + - 1994-2019 + * - Records Liberated + - ~12 million (116 raw tables), ~316,000 (7 clean tables) + * - Issues + - `Open FERC Form 1 issues `__ + Background ^^^^^^^^^^ @@ -23,6 +37,18 @@ The FERC Form 1, otherwise known as the **Electric Utility Annual Report**, cont financial and operating data for major utilities and licensees. Much of it is not publicly available anywhere else. +* :download:`A diagram of the 2015 FERC Form 1 Database (PDF) + ` +* :download:`Blank FERC Form 1 (PDF, to 2005-03-31) ` +* :download:`Blank FERC Form 1 (PDF, to 2007-06-30) ` +* :download:`Blank FERC Form 1 (PDF, to 2008-07-31) ` +* :download:`Blank FERC Form 1 (PDF, to 2011-12-31) ` +* :download:`Blank FERC Form 1 (PDF, to 2014-12-31) ` +* :download:`Blank FERC Form 1 (PDF, to 2016-11-30) ` +* :download:`Blank FERC Form 1 (PDF, to 2019-12-31) ` +* :download:`Blank FERC Form 1 (PDF, to 2022-11-30) ` + + Who is required to fill out the form? ------------------------------------- @@ -47,6 +73,7 @@ focused on the tables pertaining to power plants, their capital & operating expenses, and fuel consumption; however, we have the tools required to pull just about any other table in as well. + What does the original data look like? -------------------------------------- @@ -65,8 +92,9 @@ discontinued. FERC's continued use of this database format creates a significant barrier to data access. The FERC 1 database is poorly normalized and the data itself does not appear -to be subject to much quality control. For more detaild context and -documentation on a table-by-table basis, see :doc:`ferc1_db_notes` +to be subject to much quality control. For more detailed context and +documentation on a table-by-table basis, look at +:doc:`/data_dictionaries/ferc1_db`. Notable Irregularities ^^^^^^^^^^^^^^^^^^^^^^ @@ -105,8 +133,8 @@ PUDL Data Tables ^^^^^^^^^^^^^^^^ We've segmented the processed FERC Form 1 data into the following normalized data -tables. Clicking on the links will show you the names and descriptions of the fields -available in each table. +tables. Clicking on the links will show you a description of the table as well as +the names and descriptions of each of its fields. .. list-table:: :header-rows: 1 diff --git a/docs/data_sources/index.rst b/docs/data_sources/index.rst index 8b493a840a..94d6df766f 100644 --- a/docs/data_sources/index.rst +++ b/docs/data_sources/index.rst @@ -11,10 +11,9 @@ Data Sources eia923 epacems ferc1 - ferc1_db_notes .. toctree:: :caption: Work in Progress & Future Datasets - :maxdepth: 3 + :maxdepth: 1 wip_future diff --git a/docs/data_sources/wip_future.rst b/docs/data_sources/wip_future.rst index 6306ce4350..a69a394306 100644 --- a/docs/data_sources/wip_future.rst +++ b/docs/data_sources/wip_future.rst @@ -43,6 +43,12 @@ capacity, sales, revenues, programs, and more. Right now we've got all of 861 integrated and are building out our testing and data validation before publishing the data officially. +* :download:`EIA-861 Instructions (PDF, to 2013-10-31) + ` +* :download:`EIA-861 Instructions (PDF, to 2015-12-31) + ` +* :download:`EIA-861 Instructions (PDF, to 2017-05-31) + ` * :download:`EIA-861 Instructions (PDF, to 2020-03-31) ` * :download:`EIA-861 Instructions (PDF, to 2023-05-31) diff --git a/docs/dev/build_docs.rst b/docs/dev/build_docs.rst index 1d32b60c12..fb9a915a50 100644 --- a/docs/dev/build_docs.rst +++ b/docs/dev/build_docs.rst @@ -43,6 +43,6 @@ documentation in your text editor with appropriate plugins. If you create a new module, the corresponding documentation file will also need to be checked in to version control. - Similarly the :doc:`/data_dictionary` is generated dynamically by the - :mod:`pudl.convert.datapkg_to_rst` script that gets run by Tox when it + Similarly the :doc:`../data_dictionaries/pudl_db` is generated dynamically + by the :mod:`pudl.convert.datapkg_to_rst` script that gets run by Tox when it builds the docs. diff --git a/docs/dev/run_the_etl.rst b/docs/dev/run_the_etl.rst index 14dd7301dd..b0266888a5 100644 --- a/docs/dev/run_the_etl.rst +++ b/docs/dev/run_the_etl.rst @@ -46,7 +46,8 @@ we do in our :doc:`software integration tests `. $ datapkg_to_sqlite \ datapkg/pudl-fast/ferc1/datapackage.json \ datapkg/pudl-fast/epacems-eia/datapackage.json - $ epacems_to_parquet datapkg/pudl-fast/epacems-eia/datapackage.json + $ epacems_to_parquet --years 2019 --states ID -- \ + datapkg/pudl-fast/epacems-eia/datapackage.json The Full ETL ------------ @@ -61,7 +62,7 @@ for one state (Idaho!) and takes around 20 minutes to process. $ pudl_etl settings/etl_full.yml $ datapkg_to_sqlite datapkg/pudl-full/ferc1/datapackage.json \ datapkg/pudl-full/eia/datapackage.json - $ epacems_to_parquet datapkg/pudl-full/epacems-eia/datapackage.json + $ epacems_to_parquet --states ID -- datapkg/pudl-full/epacems-eia/datapackage.json Additional Notes ---------------- diff --git a/docs/index.rst b/docs/index.rst index 3974fd947e..c48d88aa9d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,9 +12,10 @@ The Public Utility Data Liberation Project intro data_access data_sources/index - Data Dictionary + data_dictionaries/index Contributing dev/index Licensing Code of Conduct + Release Notes Module Index diff --git a/docs/intro.rst b/docs/intro.rst index 12b5c1a4a8..d383d19f95 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -142,8 +142,9 @@ database tables. These are written out to ("loaded" into) platform independent ` data packages `__ where the data is stored as CSV files and the metadata is stored as JSON. These static, text-based output formats are archive-friendly and can be used to populate a database -or read with Python, R, and many other tools. See the :doc:`data_dictionary` page for a -list of the normalized database tables and their contents. +or read with Python, R, and many other tools. See the +:doc:`data_dictionaries/pudl_db` page for a list of the normalized database +tables and their contents. .. note:: diff --git a/docs/release_notes.rst b/docs/release_notes.rst new file mode 100644 index 0000000000..8546386f26 --- /dev/null +++ b/docs/release_notes.rst @@ -0,0 +1,230 @@ +======================================================================================= +PUDL Release Notes +======================================================================================= + +--------------------------------------------------------------------------------------- +0.4.0 (2021-07-XX) +--------------------------------------------------------------------------------------- +This is a ridiculously large update including more than a year and a half's +worth of work. + +New Data Coverage +^^^^^^^^^^^^^^^^^ + +* :doc:`data_sources/eia860` for 2004-2008 + 2019, plus eia860m through 2020. +* :doc:`data_sources/eia923` for 2001-2008 + 2019 +* :doc:`data_sources/epacems` for 2019-2020 +* :doc:`data_sources/ferc1` for 2019 +* :ref:`US Census Demographic Profile (DP1) ` for 2010 +* :ref:`data-ferc714` for 2006-2019 (experimental) +* :ref:`data-eia861` for 2001-2019 (experimental) + +Documentation & Data Accessibility +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +We've updated and (hopefully) clarified the documentation, and no longer expect +most users to perform the data processing on their own. Instead, we are offering +several methods of directly accessing already processed data: + +* Processed data archives on Zenodo that include a Docker container preserving + the required software environment for working with the data. +* `A repository of PUDL example notebooks `__ +* `A JupyterHub instance `__ + hosted in collaboration with `2i2c `__ +* Browsable database access via `Datasette `__ at + https://data.catalyst.coop + +Users who still want to run the ETL themselves will need to set up the +:doc:`set up the PUDL development environment ` + +Data Cleaning & Integration +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* We now inject placeholder utilities in the cloned FERC Form 1 database when + respondent IDs appear in the data tables, but not in the respondent table. + This addresses a bunch of unsatisfied foreign key constraints in the original + databases published by FERC. +* We're doing much more software testing and data validation, and so hopefully + we're catching more issues early on. + +Hourly Electricity Demand and Historical Utility Territories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +With support from `GridLab `__ and in collaboration with +researchers at Berkeley's `Center for Environmental Public Policy +`__, we did a bunch +of work on spatially attributing hourly historical electricity demand. This work +was largely done by :user:`ezwelty` and :user:`yashkumar1803` and included: + +* Semi-programmatic compilation of historical utility and balancing authority + service territory geometries based on the counties associated with utilities, + and the utilities associated with balancing authorities in the EIA 861 + (2001-2019). See e.g. :pr:`670` but also many others. +* A method for spatially allocating hourly electricity demand from FERC 714 to + US states based on the overlapping historical utility service territories + described above. See :pr:`741` +* A fast timeseries outlier detection routine for cleaning up the FERC 714 + hourly data using correlations between the time series reported by all of the + different entities. See :pr:`871` + +Net Generation and Fuel Consumption for All Generators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +We have developed an experimental methodology to produce net generation and +fuel consumption for all generators. The process has known issues and is being +actively developed. See :pr:`989` + +Net electricity generation and fuel consumption are reported in multiple ways in +the EIA 923. The :ref:`generation_fuel_eia923` table reports both generation and +fuel consumption, and breaks them down by plant, prime mover, and fuel. In +parallel, the :ref:`generation_eia923` table reports generation by generator, +and the :ref:`boiler_fuel_eia923` table reports fuel consumption by boiler. + +The :ref:`generation_fuel_eia923` table is more complete, but the +:ref:`generation_eia923` + :ref:`boiler_fuel_eia923` tables are more granular. +The :ref:`generation_eia923` table includes only ~55% of the total MWhs reported +in the :ref:`generation_fuel_eia923` table. + +The :mod:`pudl.analysis.allocate_net_gen` module estimates the net electricity +generation and fuel consumption attributable to individual generators based on +the more expansive reporting of the data in the :ref:`generation_fuel_eia923` +table. + +Data Management and Archiving +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* We now use a series of web scrapers to collect snapshots of the raw input data + that is processed by PUDL. These original data are archived as + `Frictionless Data Packages `__ + on `Zenodo `__, so that they can be accessed reproducibly + and programmatically via a REST API. This addresses the problems we were + having with the v0.3.x releases, in which the original data on the agency + websites was liable to be modified long after its "final" release, rendering + it incompatible with our software. These scrapers and the Zenodo archiving + scripts can be found in our + `pudl-scrapers `__ and + `pudl-zenodo-storage `__ + repositories. The archives themselves can be found within the + `Catalyst Cooperative community on Zenodo `__ +* There's an experimental caching system that allows these Zenodo archives to + work as long-term "cold storage" for citation and reproducibility, with + cloud object storage acting as a much faster way to access the same data for + day to day non-local use, implemented by :user:`rousik` +* We've decided to shift to producing a combination of relational databases + (SQLite files) and columnar data stores (Apache Parquet files) as the primary + outputs of PUDL. `Tabular Data Packages `__ + didn't end up serving either database or spreadsheet users very well. The CSV + file were often too large to access via spreadsheets, and users missed out on + the relationships between data tables. Needing to separately load the data + packages into SQLite and Parquet was a hassle and generated a lot of overly + complicated and fragile code. + +Known Issues +^^^^^^^^^^^^ + +* The EIA 861 and FERC 714 data are not yet integrated into the SQLite database + outputs, because we need to overhaul our entity resolution process to + accommodate them in the database structure. That work is ongoing, see + :issue:`639` +* The EIA 860 and EIA 923 data don't cover exactly the same rage of years. EIA + 860 only goes back to 2004, while EIA 923 goes back to 2001. This is because + the pre-2004 EIA 860 data is stored in the DBF file format, and we need to + update our extraction code to deal with the different format. This means some + analyses that require both EIA 860 and EIA 923 data (like the calculation of + heat rates) can only be performed as far back as 2004 at the moment. See + :issue:`848` +* There are 387 EIA utilities and 228 EIA palnts which appear in the EIA 923, + but which haven't yet been assigned PUDL IDs and associated with the + corresponding utilities and plants reported in the FERC Form 1. These entities + show up in the 2001-2008 EIA 923 data that was just integrated. These older + plants and utilities can't yet be used in conjuction with FERC data. When the + EIA 860 data for 2001-2003 has been integrated, we will finish this manual + ID assignment process. See :issue:`848,1069` +* 52 of the algorithmically assigned ``plant_id_ferc1`` values found in the + ``plants_steam_ferc1`` table are currently associated with more than one + ``plant_id_pudl`` value (99 PUDL plant IDs are involved), indicating either + that the algorithm is making poor assignments, or that the manually assigned + ``plant_id_pudl`` values are incorrect. This is out of several thousand + distinct ``plant_id_ferc1`` values. See :issue:`954` +* The county FIPS codes associated with coal mines reported in the Fuel Receipts and + Costs table are being treated inconsistently in terms of their data types, especially + in the output functions, so they are currently being output as floating point numbers + that have been cast to strings, rather than zero-padded integers that are strings. See + :issue:`1119` + +--------------------------------------------------------------------------------------- +0.3.2 (2020-02-17) +--------------------------------------------------------------------------------------- +The primary changes in this release: + +* The 2009-2010 data for EIA 860 have been integrated, including updates + to the data validation test cases. +* Output tables are more uniform and less restrictive in what they + include, no longer requiring PUDL Plant & Utility IDs in some tables. This + release was used to compile v1.1.0 of the PUDL Data Release, which is archived + at Zenodo under this DOI: https://doi.org/10.5281/zenodo.3672068 + + With this release, the EIA 860 & 923 data now (finally!) cover the same span + of time. We do not anticipate integrating any older EIA 860 or 923 data at + this time. + + +--------------------------------------------------------------------------------------- +0.3.1 (2020-02-05) +--------------------------------------------------------------------------------------- +A couple of minor bugs were found in the preparation of the first PUDL data +release: + +* No maximum version of Python was being specified in setup.py. PUDL currently + only works on Python 3.7, not 3.8. + +* ``epacems_to_parquet`` conversion script was erroneously attempting to + verify the availability of raw input data files, despite the fact that it now + relies on the packaged post-ETL epacems data. Didn't catch this before since + it was always being run in a context where the original data was lying + around... but that's not the case when someone just downloads the released + data packages and tries to load them. + +--------------------------------------------------------------------------------------- +0.3.0 (2020-01-30) +--------------------------------------------------------------------------------------- +This release is mostly about getting the infrastructure in place to do regular +data releases via Zenodo, and updating ETL with 2018 data. + +Added lots of data validation / quality assurance test cases in anticipation of +archiving data. See the pudl.validate module for more details. + +New data since v0.2.0 of PUDL: + +* EIA Form 860 for 2018 +* EIA Form 923 for 2018 +* FERC Form 1 for 1994-2003 and 2018 (select tables) + +We removed the FERC Form 1 accumulated depreciation table from PUDL because it +requires detailed row-mapping in order to be accurate across all the years. It +and many other FERC tables will be integrated soon, using new row-mapping +methods. + +Lots of new plants and utilities integrated into the PUDL ID mapping process, +for the earlier years (1994-2003). All years of FERC 1 data should be +integrated for all future ferc1 tables. + +Command line interfaces of some of the ETL scripts have changed, see their help +messages for details. + +--------------------------------------------------------------------------------------- +0.2.0 (2019-09-17) +--------------------------------------------------------------------------------------- +This is the first release of PUDL to generate data packages as the canonical +output, rather than loading data into a local PostgreSQL database. The data +packages can then be used to generate a local SQLite database, without relying +on any software being installed outside of the Python requirements specified for +the catalyst.coop package. + +This change will enable easier installation of PUDL, as well as archiving and +bulk distribution of the data products in a platform independent format. + +--------------------------------------------------------------------------------------- +0.1.0 (2019-09-12) +--------------------------------------------------------------------------------------- + +This is the only release of PUDL that will be made that makes use of +PostgreSQL as the primary data product. It is provided for reference, in case +there are users relying on this setup who need access to a well defined release. diff --git a/notebooks/work-in-progress/better-heatrates.ipynb b/notebooks/work-in-progress/better-heatrates.ipynb new file mode 100644 index 0000000000..bbce86000b --- /dev/null +++ b/notebooks/work-in-progress/better-heatrates.ipynb @@ -0,0 +1,785 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook Preamble" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Standard libraries\n", + "import logging\n", + "import os\n", + "import pathlib\n", + "import sys\n", + "\n", + "# 3rd party libraries\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import sqlalchemy as sa\n", + "\n", + "# Local libraries\n", + "import pudl\n", + "import pudl.constants as pc" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sns.set()\n", + "%matplotlib inline\n", + "mpl.rcParams['figure.figsize'] = (10,4)\n", + "mpl.rcParams['figure.dpi'] = 100\n", + "pd.options.display.max_columns = 100\n", + "pd.options.display.max_rows = 100" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "logger=logging.getLogger()\n", + "logger.setLevel(logging.INFO)\n", + "handler = logging.StreamHandler(stream=sys.stdout)\n", + "formatter = logging.Formatter('%(message)s')\n", + "handler.setFormatter(formatter)\n", + "logger.handlers = [handler]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Function Definitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `prep_gens()`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def prep_gens(pudl_out, prime_mover_codes):\n", + " \"\"\"\n", + " Preproccessing function to look at generators by prime mover and year.\n", + " \"\"\"\n", + " gens_eia860 = pudl_out.gens_eia860()\n", + " # Generator selection criteria\n", + " working_gens = gens_eia860.loc[gens_eia860.prime_mover_code.isin(prime_mover_codes)]\n", + " # Calculate the annual generation by generator\n", + " net_gen_by_gen = (\n", + " pudl_out.gen_eia923()\n", + " .set_index(\"report_date\")\n", + " .groupby([pd.Grouper(freq=\"AS\"), \"plant_id_eia\", \"generator_id\"])\n", + " .net_generation_mwh.sum()\n", + " .to_frame()\n", + " .reset_index()\n", + " )\n", + " # Merge annual generation by generator into the working DF\n", + " return pd.merge(\n", + " working_gens,\n", + " net_gen_by_gen,\n", + " how=\"left\",\n", + " validate=\"1:1\",\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `unit_gen_coverage()`" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def unit_gen_coverage(prepped_gens):\n", + " \"\"\"\n", + " Characterize generator-level PUDL Unit ID coverage by year.\n", + " \n", + " For each year and fossil fuel prime mover code, calculate:\n", + " \n", + " * number & fraction of generator_id values that have a unit_id_pudl\n", + " * sum and fraction of overall capacity (MW) that has a unit_id_pudl\n", + " * sum and fraction of overall generation (MWh) associated with a\n", + " unit_id_pudl in the generation_eia923 table\n", + " \n", + " \"\"\"\n", + " working_gens = prepped_gens.copy()\n", + " # A boolean column indicating whether a record has a PUDL Unit ID\n", + " working_gens.loc[:, \"has_unit_id\"] = working_gens.unit_id_pudl.notna()\n", + " \n", + " def tot_frac(df, col):\n", + " return df.loc[:, (col, True)] / df[col].sum(axis=\"columns\")\n", + " \n", + " working_gens = (\n", + " working_gens.groupby([\"report_date\", \"prime_mover_code\", \"has_unit_id\"])\n", + " .agg(\n", + " num_gens=pd.NamedAgg(column=\"generator_id\", aggfunc=\"size\"),\n", + " capacity_mw=pd.NamedAgg(column=\"capacity_mw\", aggfunc=\"sum\"),\n", + " net_generation_mwh=pd.NamedAgg(column=\"net_generation_mwh\", aggfunc=\"sum\"),\n", + " )\n", + " .unstack(fill_value=0)\n", + " .assign(\n", + " num_gens_fraction=lambda x: tot_frac(x, \"num_gens\"),\n", + " capacity_mw_fraction=lambda x: tot_frac(x, \"capacity_mw\"),\n", + " net_generation_mwh_fraction=lambda x: tot_frac(x, \"net_generation_mwh\"),\n", + " )\n", + " )\n", + " return working_gens" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `chp_prevalence()`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def chp_prevalence(gens_df):\n", + " \"\"\"\n", + " Assess the prevalence of Combined Heat & Power in Generators.\n", + " \n", + " Break down generators by year and prime mover, and within each group\n", + " calculate the proportion and total quantity associated with CHP per\n", + " \n", + " * number of generators\n", + " * capacity (MW)\n", + " * generation (MWh) in the generation_eia923 table\n", + " \"\"\"\n", + " gens_working = gens_df.copy()\n", + " # A boolean column indicating whether a record is associated with CHP\n", + " gens_working.loc[:, \"ass_chap\"] = gens_working.associated_combined_heat_power.astype(bool)\n", + " \n", + " def tot_frac(df, col):\n", + " return df.loc[:, (col, True)] / df[col].sum(axis=\"columns\")\n", + " \n", + " gens_working = (\n", + " gens_working.groupby([\"report_date\", \"prime_mover_code\", \"ass_chap\"])\n", + " .agg(\n", + " num_gens=pd.NamedAgg(column=\"generator_id\", aggfunc=\"size\"),\n", + " capacity_mw=pd.NamedAgg(column=\"capacity_mw\", aggfunc=\"sum\"),\n", + " net_generation_mwh=pd.NamedAgg(column=\"net_generation_mwh\", aggfunc=\"sum\"),\n", + " )\n", + " .unstack(fill_value=0)\n", + " .assign(\n", + " num_gens_fraction=lambda x: tot_frac(x, \"num_gens\"),\n", + " capacity_mw_fraction=lambda x: tot_frac(x, \"capacity_mw\"),\n", + " net_generation_mwh_fraction=lambda x: tot_frac(x, \"net_generation_mwh\"),\n", + " )\n", + " )\n", + " return gens_working " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `plot_unit_ids()`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_unit_ids(df, pm_codes):\n", + " for code in pm_codes:\n", + " data = (\n", + " df[df.prime_mover_code==code]\n", + " .groupby(\"report_date\")[\"unit_id_pudl\"]\n", + " .count()\n", + " )\n", + " plt.plot(data, label=code, linewidth=\"2\", markersize=\"4\", marker=\"o\")\n", + " plt.legend(loc=\"upper left\")\n", + " plt.ylabel(\"Generator Records with Unit IDs\")\n", + " plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pull data & set constants" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notebook Constants" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "THERMAL_PRIME_MOVERS = [ \"CT\", \"CS\", \"CA\", \"CC\", \"GT\", \"IC\", \"ST\", ]\n", + "\n", + "GENS_COLS = [\n", + " \"report_date\",\n", + " \"plant_id_eia\",\n", + " #\"plant_name_eia\",\n", + " \"unit_id_pudl\",\n", + " \"bga_source\",\n", + " \"generator_id\",\n", + " #\"capacity_mw\",\n", + " \"prime_mover_code\",\n", + " #\"energy_source_code_1\",\n", + " #\"energy_source_code_2\",\n", + " \"fuel_type_code_pudl\",\n", + " #\"technology_description\",\n", + " #\"associated_combined_heat_power\",\n", + "]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Create PUDL output object" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'pudl_in': '/home/zane/code/catalyst/pudl-work',\n", + " 'data_dir': '/home/zane/code/catalyst/pudl-work/data',\n", + " 'settings_dir': '/home/zane/code/catalyst/pudl-work/settings',\n", + " 'pudl_out': '/home/zane/code/catalyst/pudl-work',\n", + " 'sqlite_dir': '/home/zane/code/catalyst/pudl-work/sqlite',\n", + " 'parquet_dir': '/home/zane/code/catalyst/pudl-work/parquet',\n", + " 'datapkg_dir': '/home/zane/code/catalyst/pudl-work/datapkg',\n", + " 'ferc1_db': 'sqlite:////home/zane/code/catalyst/pudl-work/sqlite/ferc1.sqlite',\n", + " 'pudl_db': 'sqlite:////home/zane/code/catalyst/pudl-work/sqlite/pudl.sqlite',\n", + " 'censusdp1tract_db': 'sqlite:////home/zane/code/catalyst/pudl-work/sqlite/censusdp1tract.sqlite'}" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pudl_settings = pudl.workspace.setup.get_defaults()\n", + "display(pudl_settings)\n", + "\n", + "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", + "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "\n", + "API_KEY_EIA = os.environ[\"API_KEY_EIA\"]\n", + "\n", + "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Remaining PUDL Unit ID questions:\n", + "* What is the output table describing these units going to look like?\n", + "* Should it be several different well normalized tables defining different kinds of Unit IDs?\n", + "* Should it be a single un-normalized table?\n", + "* Should we back/forward fill the technology descriptions and prime mover codes? Esp. in older yeras?\n", + "* Should we fill in pseudo-boiler IDs for the units that we've created, like the latter years of CCNG plants do?\n", + "* Does every boiler that we know of (in the boiler entity table) mapped to generators in the BGA table? Or are there some orphaned, unassociated boilers?\n", + "* Should we make this more extensive Unit ID assignment process optional in the generators_eia860 output table?" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Selected 172517 ['CC', 'CS', 'GT', 'IC'] records lacking Unit IDs from 403834 records overall. \n", + "Selected 1891 ['ST'] records lacking Unit IDs from 403834 records overall. \n", + "Selected 3836 ST records lacking Unit IDs burning coal from 403834 records overall.\n", + "Selected 1299 ST records lacking Unit IDs burning oil from 403834 records overall.\n", + "Selected 4966 ST records lacking Unit IDs burning gas from 403834 records overall.\n", + "Selected 2256 ST records lacking Unit IDs burning waste from 403834 records overall.\n", + "CPU times: user 2min 23s, sys: 5.85 s, total: 2min 28s\n", + "Wall time: 2min 29s\n" + ] + } + ], + "source": [ + "%%time\n", + "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)\n", + "gens_df = pudl_out.gens_eia860(unit_ids=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "assert False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Net Generation Allocation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gf_cols = [\n", + " \"plant_id_eia\",\n", + " \"report_date\",\n", + " \"energy_source_code\",\n", + " \"prime_mover_code\",\n", + " \"net_generation_mwh\",\n", + " \"fuel_consumed_mmbtu_for_electricity\",\n", + "]\n", + "\n", + "gens_cols = [\n", + " \"plant_id_eia\",\n", + " \"generator_id\",\n", + " \"report_date\",\n", + " \"energy_source_code_1\",\n", + " \"energy_source_code_2\",\n", + " \"energy_source_code_3\",\n", + " \"energy_source_code_4\",\n", + " \"energy_source_code_5\",\n", + " \"energy_source_code_6\",\n", + " \"capacity_mw\",\n", + " \"prime_mover_code\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cull fully reported units\n", + "* In some plants, all of the generators report all of their fuel consumption and electricity generation in the `generation_eia923` and `boiler_fuel_eia923` tables. If they also don't have CHP, our current heat rate calculation should fully accommodate these plants already.\n", + "* In other plants, all of the electricity generation and fuel consumption being reported in association with a particular type of prime mover (but maybe not all prime movers) are fully covered by the more granular boiler / generator reporting, and so those plant-prime combinations can be removed from the heat rate calculations based on the `generation_fuel_eia923` table data. So long as there's no CHP. This is a more general case of the bullet above, so maybe we should just do this as the first cut.\n", + "* To identify these cases, we need to aggregate net generation and fuel consumption on the basis of `unit_id_pudl` and identify cases in which all generators of any prime mover type that is involved have been included in that aggregation, on an annual basis. In these cases the net generation and fuel consumption associated with those prime mover types can be safely removed from the `generation_fuel_eia923` table, leaving only fuel and electricity that hasn't been accounted for, or is only partially accounted for." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assess New Unit ID coverage\n", + "* Run the same assessment functions as we did above\n", + "* Per generator\n", + "* Per MW installed\n", + "* Per MWh of net generation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Heat rates by Plant-Prime\n", + "* The `generation_fuel_eia923` table breaks down net generation & fuel consumption by plant and prime mover.\n", + "* Can one calculate realistic heat rates on the basis of plant-prime? Or do they always need to have units?\n", + "* Calculate the distribution of plant-prime heat rates and plot them to see what they look like.\n", + "* It seems likely that this strategy probably won't work, and we'll need to do some kind of grouping into pseudo-units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gen_fuel = pudl_out.gf_eia923()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(gen_fuel)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert False" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Combined Heat and Power\n", + "* What fraction of generators, capacity, and generation are associated with generators that also do CHP?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gf_eia923 = pudl_out.gf_eia923()\n", + "gf_eia923[\"fuel_ratio\"] = gf_eia923.fuel_consumed_for_electricity_mmbtu / gf_eia923.fuel_consumed_mmbtu\n", + "gf_eia923[\"no_chp\"] = pd.Series(np.isclose(gf_eia923.fuel_ratio, 1.0))\n", + "plt.hist(gf_eia923.fuel_ratio, bins=20)\n", + "plt.yscale(\"log\")\n", + "plt.xlabel(\"Fraction of fuel used for electricity\")\n", + "plt.ylabel(\"Number of Records (log scale)\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gf_eia923.groupby(\"report_date\")[[\"fuel_consumed_for_electricity_mmbtu\", \"fuel_consumed_mmbtu\"]].sum().plot()\n", + "plt.ylim(0,5e9)\n", + "plt.ylabel(\"Fuel Consumed [MMBTU]\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,10))\n", + "plt.scatter(\n", + " gf_eia923.fuel_consumed_mmbtu,\n", + " gf_eia923.fuel_consumed_for_electricity_mmbtu,\n", + " s=1, alpha=0.01, color=\"black\",\n", + ")\n", + "plt.xscale(\"log\")\n", + "plt.yscale(\"log\")\n", + "plt.xlim(1e3, 1e8)\n", + "plt.ylim(1e3, 1e8)\n", + "plt.xlabel(\"Total Fuel Consumed [MMBTU]\")\n", + "plt.ylabel(\"Fuel Consumed for Electricity [MMBTU]\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "chp_summary = chp_prevalence(prepped_gens)\n", + "chp_summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assess Existing Heat Rates\n", + "* We need some way to compare different ways of calculating heat rates and choose between them.\n", + "* We want to check both for their correctness, and their completeness.\n", + "* Apparent correctness will depend on the type of generator / unit type. Need to define different expectations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "hr_by_unit = pudl_out.hr_by_unit()\n", + "hr_by_unit.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compile Net Generation\n", + "* **By generator**\n", + " * `net_generation_mwh` available from `generation_eia923` table\n", + " * Associated with `plant_id_eia` and `generator_id` columns directly.\n", + " * Can be associated with `unit_id_pudl` if the generator is part of an identified unit\n", + " * Can be associated with a `technology_description` and `prime_mover_code` based on the `generators_eia860` table.\n", + " * Can be associated with a list of energy sources based on `energy_source_N` in `generators_eia860` table.\n", + "* **By plant-prime-fuel**\n", + " * `net_generation_mwh` is available from generation_fuel_eia923`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compile Fuel Consumption\n", + "* **By boiler**\n", + " * `total_heat_content_mmbtu` and `fuel_type_code_pudl` are available by `plant_id_eia` and `boiler_id` in `boiler_fuel_eia923`\n", + " * This value can be be associated with a `unit_id_pudl` based on the BGA table.\n", + " * The heat consumed in here includes fuels both for electricity and steam (direct heat) outputs.\n", + "* **By plant-prime-fuel**\n", + " * Available in `generation_fuel` and broken down as for CHP vs Electricity separately." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Identify Combined Cycle Units\n", + "* Combined Cycle turbines show up in the generators table, but have no \"boiler\" so they don't end up in the boiler-generator-association\n", + "* This means they don't get assigned `unit_id_pudl` values and are often lost.\n", + "* However, they are identifiable based on `technology_description` in the generators table, and so can be associated with a plant.\n", + "* Within a given plant, it's possible to combine all the natural gas that goes into a " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Questions:\n", + "* Do all of the generators that show up in the generation table end up getting PUDL Unit IDs assigned?\n", + "* Where are the heat inputs being reported for combined cycle units? Do they really exist after 2015? What is still missing?\n", + "* Triage units / generators into: Easy, Hard, and Impossible. Work on Hard ones until diminishing returns. Assign impossible and too-hard ones the median values." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tables of Interest:\n", + "* `generation_eia923`\n", + "* `generation_fuel_eia923`\n", + "* `boiler_generator_assn_eia860`\n", + "* `generators_eia860`\n", + "* `generators_entity_eia`\n", + "* `boiler_fuel_eia923`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gens_eia860_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " generator_id,\n", + " capacity_mw,\n", + " energy_source_code_1,\n", + " energy_source_code_2,\n", + " energy_source_code_3,\n", + " energy_source_code_4,\n", + " energy_source_code_5,\n", + " energy_source_code_6,\n", + " fuel_type_code_pudl,\n", + " technology_description,\n", + "FROM generators_eia860\n", + "\"\"\"\n", + "\n", + "gens_entity_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " generator_id,\n", + " prime_mover_code,\n", + " bypass_heat_recovery,\n", + " associated_combined_heat_and_power,\n", + "FROM generators_entity_eia\n", + "\"\"\"\n", + "\n", + "gen_eia923_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " generator_id,\n", + " net_generation_mwh\n", + "FROM generation_eia923\n", + "\"\"\"\n", + "\n", + "gf_eia923_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " nuclear_unit_id,\n", + " fuel_type,\n", + " fuel_type_code_pudl,\n", + " prime_mover_code,\n", + " fuel_consumed_mmbtu,\n", + " fuel_consumed_for_electricity_mmbtu,\n", + " net_generation_mwh\n", + "FROM generation_fuel_eia923\n", + "\"\"\"\n", + "\n", + "bga_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " unit_id_pudl,\n", + " generator_id,\n", + " boiler_id\n", + "FROM boiler_generator_assn_eia860\n", + "\"\"\"\n", + "\n", + "bf_eia923_sql = \"\"\"\n", + "SELECT report_date,\n", + " plant_id_eia,\n", + " boiler_id,\n", + " fuel_type_code,\n", + " fuel_type_code_pudl,\n", + " fuel_consumed_units,\n", + " fuel_mmbtu_per_unit\n", + "FROM boiler_fuel_eia923\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compile Fuel Consumption\n", + " * Reported by boiler in `boiler_fuel_eia923` and so can be associated " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preliminary Data Wrangling\n", + "Once all of the data is loaded and looks like it's in good shape, do any initial wrangling that's specific to this particular analysis. This should mostly make use of the higher level functions which were defined above. If this step takes a while, don't be shy about producing `logging` outputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Analysis and Visualization\n", + "* Now that you've got the required data in a usable form, you can tell the story of your analysis through a mix of visualizations, and further data wrangling steps.\n", + "* This narrative should be readable, with figures that have titles, legends, and labeled axes as appropriate so others can understand what you're showing them.\n", + "* The code should be concise and make use of the parameters and functions which you've defined above when possible. Functions should contain comprehensible chunks of work that make sense as one step in the story of the analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "coal_ax = finite_distplot(mcoe_coal, \"heat_rate_mmbtu_mwh\", max_val=20)\n", + "plt.title(\"Coal heat rate distribution\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gas_ax = finite_distplot(mcoe_gas, \"heat_rate_mmbtu_mwh\", max_val=20)\n", + "plt.title(\"Gas heat rate distribution\");" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "toc-autonumbering": true, + "toc-showmarkdowntxt": false, + "toc-showtags": false + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/work-in-progress/eia_column_changes_through_time.ipynb b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb new file mode 100644 index 0000000000..2afee522af --- /dev/null +++ b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb @@ -0,0 +1,1245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EIA923 Column Changes\n", + "This notebook reimplements the excel extractor process to extract each sheet of each excel file separately. This preserves the original structure for easier comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import pudl\n", + "from pudl import constants as pc\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "pd.options.display.max_columns = 150\n", + "pd.options.display.max_rows = 150" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# make notebooks full width\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "pudl_settings = pudl.workspace.setup.get_defaults()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "eia923_tables = pc.pudl_tables['eia923']\n", + "eia923_years = list(range(2001, 2020))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = pudl.workspace.datastore.Datastore(local_cache_path=Path(pudl_settings[\"data_dir\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "eia923_extractor = pudl.extract.eia923.Extractor(ds)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {}\n", + "# Lightly altered extractor code (pudl.extract.excel.GenericExtractor.extract) to avoid concatenating prematurely\n", + "for partition in pudl.helpers.iterate_multivalue_dict(year=eia923_years):\n", + " dfs[partition['year']] = {}\n", + " for page in eia923_extractor._metadata.get_all_pages():\n", + " if eia923_extractor.excel_filename(page, **partition) == '-1':\n", + " continue\n", + " newdata = pd.read_excel(\n", + " eia923_extractor.load_excel_file(page, **partition),\n", + " sheet_name=eia923_extractor._metadata.get_sheet_name(\n", + " page, **partition),\n", + " skiprows=eia923_extractor._metadata.get_skiprows(page, **partition),\n", + " skipfooter=eia923_extractor._metadata.get_skipfooter(\n", + " page, **partition),\n", + " dtype=eia923_extractor.get_dtypes(page, **partition),\n", + " nrows=20\n", + " )\n", + " newdata = pudl.helpers.simplify_columns(newdata)\n", + " newdata = eia923_extractor.process_raw(newdata, page, **partition)\n", + " newdata = eia923_extractor.process_renamed(newdata, page, **partition)\n", + " dfs[partition['year']][page] = newdata" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2001 dict_keys(['generation_fuel', 'stocks'])\n", + "2002 dict_keys(['generation_fuel', 'stocks'])\n", + "2003 dict_keys(['generation_fuel', 'stocks'])\n", + "2004 dict_keys(['generation_fuel', 'stocks'])\n", + "2005 dict_keys(['generation_fuel', 'stocks'])\n", + "2006 dict_keys(['generation_fuel', 'stocks'])\n", + "2007 dict_keys(['generation_fuel', 'stocks'])\n", + "2008 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'stocks'])\n", + "2009 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'stocks'])\n", + "2010 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'stocks'])\n", + "2011 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2012 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2013 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2014 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2015 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2016 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2017 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2018 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n", + "2019 dict_keys(['boiler_fuel', 'fuel_receipts_costs', 'generation_fuel', 'generator', 'plant_frame', 'stocks'])\n" + ] + } + ], + "source": [ + "for k, v in dfs.items():\n", + " print(k, v.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# make dataframes of columns. One df per excel sheet, one row per year\n", + "from collections import defaultdict\n", + "col_dfs = defaultdict(list)\n", + "for page in eia923_extractor._metadata.get_all_pages():\n", + " for year in eia923_years:\n", + " try:\n", + " col_dfs[page].append(dfs[year][page].columns.to_frame().rename(columns={0: year}))\n", + " except KeyError:\n", + " continue" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "col_dfs = {k : pd.concat(v, axis=1).T for k, v in col_dfs.items()}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This shows the state of the columns for each year for each sheet. When a column is introduced (or disappears), that entry will be NaN. This particular page only has data from 2011 on." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorsector_namenaics_codecombined_heat_powerreporting_frequencynameplate_capacity_mw
2011report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorsector_namenaics_codecombined_heat_powerreporting_frequencynameplate_capacity_mw
2012report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2013report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorsector_namenaics_codecombined_heat_powerreporting_frequencyNaN
2014report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2015report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2016report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2017report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2018report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
2019report_yearplant_id_eiaplant_name_eiaplant_stateeia_sectorNaNnaics_codecombined_heat_powerreporting_frequencyNaN
\n", + "
" + ], + "text/plain": [ + " report_year plant_id_eia plant_name_eia plant_state eia_sector \\\n", + "2011 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2012 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2013 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2014 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2015 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2016 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2017 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2018 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "2019 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", + "\n", + " sector_name naics_code combined_heat_power reporting_frequency \\\n", + "2011 sector_name naics_code combined_heat_power reporting_frequency \n", + "2012 NaN naics_code combined_heat_power reporting_frequency \n", + "2013 sector_name naics_code combined_heat_power reporting_frequency \n", + "2014 NaN naics_code combined_heat_power reporting_frequency \n", + "2015 NaN naics_code combined_heat_power reporting_frequency \n", + "2016 NaN naics_code combined_heat_power reporting_frequency \n", + "2017 NaN naics_code combined_heat_power reporting_frequency \n", + "2018 NaN naics_code combined_heat_power reporting_frequency \n", + "2019 NaN naics_code combined_heat_power reporting_frequency \n", + "\n", + " nameplate_capacity_mw \n", + "2011 nameplate_capacity_mw \n", + "2012 NaN \n", + "2013 NaN \n", + "2014 NaN \n", + "2015 NaN \n", + "2016 NaN \n", + "2017 NaN \n", + "2018 NaN \n", + "2019 NaN " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col_dfs['plant_frame']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Filter for columns with any NaNs to avoid filling the screen with unchanged columns" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sector_namenameplate_capacity_mw
2011sector_namenameplate_capacity_mw
2012NaNNaN
2013sector_nameNaN
2014NaNNaN
2015NaNNaN
2016NaNNaN
2017NaNNaN
2018NaNNaN
2019NaNNaN
\n", + "
" + ], + "text/plain": [ + " sector_name nameplate_capacity_mw\n", + "2011 sector_name nameplate_capacity_mw\n", + "2012 NaN NaN\n", + "2013 sector_name NaN\n", + "2014 NaN NaN\n", + "2015 NaN NaN\n", + "2016 NaN NaN\n", + "2017 NaN NaN\n", + "2018 NaN NaN\n", + "2019 NaN NaN" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col_dfs['plant_frame'].loc[:,col_dfs['plant_frame'].isna().any()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Look at all the dataframes this way" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "a = iter(col_dfs.items())" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "boiler_fuel\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "fuel_receipts_costs has a weird column dropout: mercury content in 2013. But I checked the raw excel sheet and it is real." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fuel_receipts_costs\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mercury_content_ppmnatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppm
2008NaNNaNNaNNaN
2009NaNNaNNaNNaN
2010NaNNaNNaNNaN
2011NaNNaNNaNNaN
2012mercury_content_ppmNaNNaNNaN
2013NaNNaNNaNNaN
2014mercury_content_ppmNaNNaNNaN
2015mercury_content_ppmNaNNaNNaN
2016mercury_content_ppmnatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppm
2017mercury_content_ppmnatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppm
2018mercury_content_ppmnatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppm
2019mercury_content_ppmnatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppm
\n", + "
" + ], + "text/plain": [ + " mercury_content_ppm natural_gas_delivery_contract_type_code \\\n", + "2008 NaN NaN \n", + "2009 NaN NaN \n", + "2010 NaN NaN \n", + "2011 NaN NaN \n", + "2012 mercury_content_ppm NaN \n", + "2013 NaN NaN \n", + "2014 mercury_content_ppm NaN \n", + "2015 mercury_content_ppm NaN \n", + "2016 mercury_content_ppm natural_gas_delivery_contract_type_code \n", + "2017 mercury_content_ppm natural_gas_delivery_contract_type_code \n", + "2018 mercury_content_ppm natural_gas_delivery_contract_type_code \n", + "2019 mercury_content_ppm natural_gas_delivery_contract_type_code \n", + "\n", + " moisture_content_pct chlorine_content_ppm \n", + "2008 NaN NaN \n", + "2009 NaN NaN \n", + "2010 NaN NaN \n", + "2011 NaN NaN \n", + "2012 NaN NaN \n", + "2013 NaN NaN \n", + "2014 NaN NaN \n", + "2015 NaN NaN \n", + "2016 moisture_content_pct chlorine_content_ppm \n", + "2017 moisture_content_pct chlorine_content_ppm \n", + "2018 moisture_content_pct chlorine_content_ppm \n", + "2019 moisture_content_pct chlorine_content_ppm " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "generation_fuel\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "generator\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "plant_frame has a few ephemeral columns. Again, checking the raw EIA excel sheets shows that they really do only exist for 2011 and 2013" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "plant_frame\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sector_namenameplate_capacity_mw
2011sector_namenameplate_capacity_mw
2012NaNNaN
2013sector_nameNaN
2014NaNNaN
2015NaNNaN
2016NaNNaN
2017NaNNaN
2018NaNNaN
2019NaNNaN
\n", + "
" + ], + "text/plain": [ + " sector_name nameplate_capacity_mw\n", + "2011 sector_name nameplate_capacity_mw\n", + "2012 NaN NaN\n", + "2013 sector_name NaN\n", + "2014 NaN NaN\n", + "2015 NaN NaN\n", + "2016 NaN NaN\n", + "2017 NaN NaN\n", + "2018 NaN NaN\n", + "2019 NaN NaN" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stocks\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k,v = next(a)\n", + "print(k)\n", + "v.loc[:,v.isna().any()]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/work-in-progress/explore_fuel_allocation.ipynb b/notebooks/work-in-progress/explore_fuel_allocation.ipynb new file mode 100644 index 0000000000..cc94fa4664 --- /dev/null +++ b/notebooks/work-in-progress/explore_fuel_allocation.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "innocent-temperature", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cathedral-general", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "narrow-bloom", + "metadata": {}, + "outputs": [], + "source": [ + "IDX_PM_FUEL = ['prime_mover_code', 'fuel_type'] # plus 'plant_id_eia', 'report_date' of course\n", + "# inputs\n", + "gens = pd.DataFrame({\n", + " 'generator_id': ['A', 'B', 'C', 'D'],\n", + " 'unit_id_pudl': [1, 1, 2, 3],\n", + " 'prime_mover_code': ['GT', 'ST', 'GT', 'GT'],\n", + " 'Energy_source_code_1': ['NG', 'NG', 'NG', 'NG'],\n", + " 'Energy_source_code_2': ['DFO', np.nan, 'DFO', np.nan],\n", + " 'capacity_mw': [100, 50, 25, 10]\n", + "})\n", + "bf = pd.DataFrame({\n", + " 'boiler_id': ['X1', 'Y1'],\n", + " 'fuel_type': ['NG', 'DFO'],\n", + " 'unit_id_pudl': [1, 1],\n", + " 'fuel_consumed_mmbtu': [8, 2]\n", + "})\n", + "gf = pd.DataFrame({\n", + " 'prime_mover_code': \n", + " ['GT', 'ST', 'GT'],\n", + " 'fuel_type': \n", + " ['NG', 'NG', 'DFO'],\n", + " 'fuel_consumed_mmbtu': \n", + " [7, 2, 3]\n", + "})\n", + "\n", + "## Interim Output\n", + "# aggregate the bf table to the unit leve\n", + "bf_unit = pd.DataFrame({\n", + " 'unit_id_pudl': [1],\n", + " 'fuel_consumed_mmbtu': [10]\n", + "})\n", + "# stack the gens table with the different fuel types\n", + "gens_stacked = pd.DataFrame({\n", + " 'generator_id': ['A', 'A', 'B', 'C', 'C', 'D'],\n", + " 'unit_id_pudl': [1, 1, 1, 2, 2, 3],\n", + " 'prime_mover_code': ['GT', 'GT', 'ST', 'GT', 'GT', 'GT'],\n", + " 'fuel_type': ['NG', 'DFO', 'NG', 'NG', 'DFO', 'NG']\n", + "})\n", + "# associate the generators w/ the gf table (mergeon IDX_PM_F)\n", + "# and calculate values (sums/groupbys)\n", + "gen_assoc = pd.DataFrame({\n", + " 'generator_id': ['A', 'A', 'B', 'C', 'C', 'D'],\n", + " 'unit_id_pudl': [1, 1, 1, 2, 2, 3,],\n", + " 'prime_mover_code': \n", + " ['GT', 'GT', 'ST', 'GT', 'GT', 'GT'],\n", + " 'fuel_type': \n", + " ['NG', 'DFO', 'NG', 'NG', 'DFO', 'NG'],\n", + " 'capacity_mw': \n", + " [100, 100, 50, 25, 25, 10],\n", + " # what we are trying to allocate\n", + " # merge btwn gen_assoc & gf, by PM_F\n", + " # Fuel totals on a per-pm-fuel basis\n", + " 'fuel_consumed_mmbtu_gf': \n", + " [7, 3, 2, 7, 3, 7],\n", + "\n", + " ########\n", + " # sums/groupbys that we need before doing any of the assign-type calcs\n", + " 'capacity_mw_pm_fuel': \n", + " [135, 125, 50, 135, 125, 10],\n", + " # (summed by PM)\n", + " 'fuel_consumed_mmbtu_gf_pm':\n", + " [10, 10, 2, 10, 10, 10],\n", + " # from bf table, summed by fuel\n", + " 'fuel_consumed_mmbtu_bf_fuel':\n", + " [8, 2, 8, 8, 2, 8],\n", + " # from gf table, summed by fuel\n", + " 'fuel_consumed_mmbtu_gf_fuel': \n", + " [9, 3, 9, 9, 3, 9],\n", + " # reported by unit and fuel type in BF\n", + " # merged into association table, broadcasting across prime mover\n", + " 'fuel_consumed_mmbtu_bf':\n", + " [8, 2, 8, np.nan, np.nan, np.nan],\n", + " 'exist_in_bf':\n", + " [True, True, True, False, False, False],\n", + " # sum of cap in [pm, fuel + exist_in_bf] group\n", + " 'capacity_mw_pm_fuel_exist_in_bf':\n", + " [100, 100, 50, 35, 35, 35],\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pleased-following", + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# gen_assoc tabl but only the records that don't exist in bf\n", + "# plus the calculated fields we need to generate the final fuel\n", + "frac_not_in_bf = pd.DataFrame({\n", + " ### columns from gen_assoc\n", + " 'generator_id': ['C', 'C', 'D'],\n", + " 'unit_id_pudl': [2, 2, 3,],\n", + " 'prime_mover_code': ['GT', 'GT', 'GT'],\n", + " 'fuel_type': ['NG', 'DFO', 'NG'],\n", + " 'capacity_mw': [25, 25, 10],\n", + " # what we are trying to allocate\n", + " # merge btwn gen_assoc & gf, by PM_F\n", + " # Fuel totals on a per-pm-fuel basis\n", + " 'fuel_consumed_mmbtu_gf': \n", + " [7, 3, 7],\n", + " \n", + " ########\n", + " # sums/groupbys that we need before doing any of the assign-type calcs\n", + " # from bf table, summed by fuel\n", + " 'fuel_consumed_mmbtu_bf_fuel':\n", + " [8, 2, 8],\n", + " # from gf table, summed by fuel\n", + " 'fuel_consumed_mmbtu_gf_fuel': \n", + " [9, 3, 9],\n", + " # sum of cap in [pm, fuel + exist_in_bf] group\n", + " # total capacity in each pm fuel group\n", + " 'capacity_mw_pm_fuel_exist_in_bf':\n", + " [35, 25, 35],\n", + " \n", + " # what fuel should be assigned to these \"not in bf\" \n", + " # records based on fuel groupings?\n", + " # fuel that should be allocated to the not-reporting-to-bf records\n", + " # (the fuel in the gf tbl's fuel group - the fuel in the bf tbl's fuel group)\n", + " # (fuel_consumed_mmbtu_gf_fuel - fuel_consumed_mmbtu_bf_fuel) / fuel_consumed_mmbtu_gf\n", + " 'fuel_consumed_mmbtu_not_in_bf': \n", + " [(9-8), (3-2), (9-8)],\n", + " \n", + " ### Get the frac_cap (used for allocating within gens that don't report to bf)\n", + " # Portion of capacity for each PM_FUEL group\n", + " # capacity_mw / capacity_mw_pm_fuel_exist_in_bf\n", + " 'frac_cap':\n", + " [25/35, 25/25, 10/35],\n", + " \n", + " # frac * fuel_consumed_mmbtu_gf\n", + " 'fuel_consumed_mmbtu':\n", + " [(9-8) * (25/35), (3-2) * (35/35), (9-8) * (10/35)],\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "legislative-maintenance", + "metadata": {}, + "outputs": [], + "source": [ + "frac_df_in_bf = pd.DataFrame({\n", + " 'generator_id': ['A', 'A', 'B'],\n", + " 'unit_id_pudl': [1, 1, 1],\n", + " 'prime_mover_code': ['GT', 'GT', 'ST'],\n", + " 'fuel_type': ['NG', 'DFO', 'NG'],\n", + " 'capacity_mw': \n", + " [100, 100, 50],\n", + " # merge btwn gen_assoc & gf, by PM_F\n", + " # Fuel totals on a pm-fuel basis\n", + " 'fuel_consumed_mmbtu_gf':\n", + " [7, 3, 2],\n", + " \n", + " ########\n", + " # sums/groupbys that we need before doing any of the assign-type calcs\n", + " 'capacity_mw_pm_fuel': \n", + " [135, 125, 50],\n", + " # reported by unit and fuel type in BF\n", + " # merged into association table, broadcasting across prime mover\n", + " 'fuel_consumed_mmbtu_bf':\n", + " [8, 2, 8],\n", + " # from gf table, summed by fuel\n", + " 'fuel_consumed_mmbtu_gf_fuel': \n", + " [9, 3, 9],\n", + " \n", + " 'frac_cap_in_pm_fuel':\n", + " [100/135, 100/125, 50/50],\n", + " \n", + " # portion of the fuel that should be allocated to the in-bf records\n", + " # this is on a fuel basis bc the bf table is reported w/ only fuel (no PM)\n", + " # fuel_consumed_mmbtu_bf/fuel_consumed_mmbtu_gf_fuel\n", + " 'frac_in_bf':\n", + " [(8/9), (2/3), (8/9)],\n", + " # we are...\n", + " # fuel_consumed_mmbtu_gf * frac_in_bf\n", + " 'fuel_consumed_mmbtu':\n", + " [7*(8/9), 3*(2/3), 2*(8/9)],\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "prospective-budget", + "metadata": {}, + "outputs": [], + "source": [ + "frac_df_in_bf.assign(\n", + " test=lambda x: x.frac_cap_in_pm_fuel * x.frac_in_bf,\n", + " test_fc=lambda x: x.fuel_consumed_mmbtu_gf * x.test\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "defined-impossible", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/work-in-progress/explore_net_generation.ipynb b/notebooks/work-in-progress/explore_net_generation.ipynb index c30b70fe2b..e33d105263 100644 --- a/notebooks/work-in-progress/explore_net_generation.ipynb +++ b/notebooks/work-in-progress/explore_net_generation.ipynb @@ -35,7 +35,8 @@ "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "%matplotlib inline\n", - "mpl.style.use('dark_background')" + "mpl.style.use('dark_background')\n", + "plt.rcParams[\"figure.figsize\"] = (15,10)" ] }, { @@ -80,13 +81,38 @@ "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine,freq='AS', end_date=\"2019-12-31\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "gen_allocated = pudl_out.gen_allocated_eia923(update=True)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "gen_allocated = pudl_out.gen_allocated(update=True)" + "plt.plot(pudl_out.gf_eia923()\n", + " .groupby(by='report_date',).sum().fuel_consumed_mmbtu,\n", + " label='Fuel Table', linewidth=6, color='turquoise'\n", + " )\n", + "plt.plot(pudl_out.gen_allocated_eia923()\n", + " .groupby(by='report_date',).sum().fuel_consumed_mmbtu,\n", + " label='Reassigned',linewidth=6, color='deeppink'\n", + " )\n", + "\n", + "plt.legend()\n", + "plt.ylabel(\"Total Fuel Consumed (mmBTU)\")\n", + "plt.xlabel(\"year\")\n", + "plt.title(\"Reassigned fuel consumed v fuel table\", size=14)\n", + "plt.show()" ] }, { @@ -97,11 +123,11 @@ "source": [ "plt.plot(pudl_out.gf_eia923()\n", " .groupby(by='report_date',).sum().fuel_consumed_mmbtu,\n", - " label='Fuel Table'\n", + " label='Fuel Table', linewidth=6, color='turquoise'\n", " )\n", - "plt.plot(pudl_out.gen_allocated()\n", + "plt.plot(pudl_out.gen_allocated_eia923()\n", " .groupby(by='report_date',).sum().fuel_consumed_mmbtu,\n", - " label='Reassigned',\n", + " label='Reassigned',linewidth=6, color='deeppink'\n", " )\n", "\n", "plt.legend()\n", @@ -119,11 +145,33 @@ "source": [ "plt.plot(pudl_out.gf_eia923()\n", " .groupby(by='report_date',dropna=False).sum().net_generation_mwh,\n", - " label='Fuel Table Net Gen'\n", + " label='Fuel Table Net Gen', linewidth=6, color='turquoise'\n", + " )\n", + "plt.plot(pudl_out.gen_allocated_eia923()\n", + " .groupby(by='report_date',dropna=False).sum().net_generation_mwh,\n", + " label='Reassigned Net Gen', linewidth=6, color='deeppink'\n", + " )\n", + "\n", + "plt.legend()\n", + "plt.ylabel(\"Total Net Gen (MWh)\")\n", + "plt.xlabel(\"year\")\n", + "plt.title(\"Reassigned Net gen v fuel table\", size=14)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(pudl_out.gf_eia923()\n", + " .groupby(by='report_date',dropna=False).sum().net_generation_mwh,\n", + " label='Fuel Table Net Gen', linewidth=6, color='turquoise'\n", " )\n", - "plt.plot(pudl_out.gen_allocated()\n", + "plt.plot(pudl_out.gen_allocated_eia923()\n", " .groupby(by='report_date',dropna=False).sum().net_generation_mwh,\n", - " label='Reassigned Net Gen',\n", + " label='Reassigned Net Gen', linewidth=6, color='deeppink'\n", " )\n", "\n", "plt.legend()\n", @@ -213,13 +261,13 @@ "source": [ "plt.plot((net_gen_diff.groupby(by='report_date',dropna=False).sum().net_gen_diff_num\n", " /net_gen_diff.groupby(by='report_date',dropna=False).sum().net_generation_mwh_gf),\n", - " label='evvvveerrything'\n", + " label='all generators', linewidth=6, color='turquoise'\n", " )\n", "plt.plot((net_gen_diff[(net_gen_diff.net_generation_mwh_gen.notnull())]\n", " .groupby(by='report_date',dropna=False).sum().net_gen_diff_num\n", " /net_gen_diff.groupby(by='report_date',dropna=False).sum().net_generation_mwh_gf\n", " ),\n", - " label='only co-reported*'\n", + " label='only co-reported*', linewidth=6, color='deeppink'\n", " )\n", "\n", "plt.legend()\n", @@ -239,10 +287,10 @@ " df = net_gen_diff[diff_mask & (net_gen_diff.report_date.dt.year == year)]\n", " plt.hist(\n", " df.net_gen_diff_rate,\n", - " bins=75,\n", - " range=(-.5,1.5),\n", - " label=year, #stacked=True,\n", - " #weights=abs(df.net_generation_mwh_gf)\n", + " bins=150,\n", + " range=(-.25,1.5),\n", + " label=year,\n", + " weights=abs(df.net_generation_mwh_gf)\n", " )\n", "plt.title(\"Pre-2015 ratio of net gen from gen table vs gen fuel table\", size=16)\n", "plt.legend()\n", @@ -261,9 +309,9 @@ " df = net_gen_diff[diff_mask & (net_gen_diff.report_date.dt.year == year)]\n", " plt.hist(\n", " df.net_gen_diff_rate,\n", - " bins=75,\n", - " range=(-.5,1.5),\n", - " label=year, #stacked=True,\n", + " bins=150,\n", + " range=(-.25,1.5),\n", + " label=year,\n", " weights=abs(df.net_generation_mwh_gf)\n", " )\n", "plt.title(\"Post-2014 ratio of net gen from gen table vs gen fuel table\", size=16)\n", @@ -273,6 +321,28 @@ "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for year in range(2009,2019):\n", + " df = net_gen_diff[diff_mask & (net_gen_diff.report_date.dt.year == year)]\n", + " plt.hist(\n", + " df.net_gen_diff_rate,\n", + " bins=200,\n", + " range=(1.001,2),\n", + " label=year,\n", + " weights=abs(df.net_generation_mwh_gf)\n", + " )\n", + "plt.title(\"Bad Ratios of net gen from gen table vs gen fuel table\", size=16)\n", + "plt.legend()\n", + "plt.xlabel(\"\"\"ratio\n", + "note: All records that had nearly identical net generation has been removed\"\"\")\n", + "plt.show()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -341,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.9.2" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index 5f2a5c9341..b644376cec 100644 --- a/setup.py +++ b/setup.py @@ -20,9 +20,9 @@ "matplotlib~=3.0", "networkx~=2.2", "numpy~=1.20", - "pandas==1.2.5", + "pandas~=1.2.5", "prefect[viz, gcp]~=0.14.2", - "pyarrow~=3.0", + "pyarrow~=4.0", "pygeos~=0.9.0", "pyyaml~=5.0", "scikit-learn~=0.24.1", @@ -48,7 +48,6 @@ "isort~=5.0", "jedi~=0.18", "lxml~=4.6", - "pdbpp~=0.10", "tox~=3.20", "twine~=3.3", ] @@ -57,6 +56,7 @@ "doc8~=0.8.0", "sphinx~=3.0", "sphinx-issues~=1.2", + "sphinx-reredirects", "sphinx_rtd_theme~=0.5.0", ] diff --git a/src/pudl/__init__.py b/src/pudl/__init__.py index 0b88081c02..3c22ac09f7 100644 --- a/src/pudl/__init__.py +++ b/src/pudl/__init__.py @@ -41,7 +41,6 @@ import pudl.output.epacems import pudl.output.ferc1 import pudl.output.ferc714 -import pudl.output.glue import pudl.output.pudltabl # Transformation functions, organized by data source: import pudl.transform.eia diff --git a/src/pudl/analysis/allocate_net_gen.py b/src/pudl/analysis/allocate_net_gen.py index 43d2df89a4..fff9764156 100644 --- a/src/pudl/analysis/allocate_net_gen.py +++ b/src/pudl/analysis/allocate_net_gen.py @@ -1,66 +1,90 @@ """ -Allocated data from generation_fuel_eia923 table to generator level. - -Net generation and fuel consumption is reported in two seperate tables in EIA -923: in the generation_eia923 and generation_fuel_eia923 tables. While the -generation_fuel_eia923 table is more complete (the generation_eia923 table -includes only ~55% of the reported MWhs), the generation_eia923 table is more -granular (it is reported at the generator level). - -This module allocates net generation and fuel consumption from the -generation_fuel_eia923 table to the generator level. The main function here is -``allocate_gen_fuel_by_gen()``. - -The methodology we are employing here to allocate the net generation from the -generation_fuel_eia923 table is not the only option and includes many -assumptions. Firstly, this methodology assumes the generation_fuel_eia923 -table is the ground truth for net generation - as opposed to the -generation_eia923 table. We are making this assumption because we know that the -generation_fuel_eia923 table is necessarily more complete - there are many -full plants or generators in plants that do not report to the generation_eia923 -table at all. - -The next important note is the way in which we associated the data reported in -the generation_fuel_eia923 table with generators. The generation_fuel_eia923 -table is reported at the level of prime_mover_code/fuel_type (See -``IDX_PM_FUEL``). Generators have prime_mover_codes, fuel_types (in -energy_source_code_*s) and report_dates. This methology does not distinguish -between primary and secondary fuel_types for generators - it associates -portions of net generatoion to each prime_mover_code/fuel_type. - -The last high-level point about this methodology surrounds the allocation -method. In order to allocate portions of the net generation, we calculate as -allocation ratio, which is based on the net generation from the -generation_eia923 table when available and the capacity_mw from the -generators_eia860 table. Some plants have a portion of their generators that -report to generation_eia923. For those plants, we assign an allocation ratio in -three steps: first we generate an allocation ratio based on capacity_mw for each -group of generators (generators the do report in generation_eia923 and those -that do not). Then we generate an allocation ratio based on the net -generation reported in generation_eia923. Then we multiply both allocation -ratios together to scale down the net generation based ratio based on the -capacity of the generators reporting in generation_eia923. - -This methodology has several potentail flaws and drawbacks. Because there is no -indicator of what portion of the energy_source_codes (ie. fule_type), we -associate the net generation equally amoung them. In effect, if a plant had +Allocate data from generation_fuel_eia923 table to generator level. + +Net electricity generation and fuel consumption are reported in mutiple ways +in the EIA 923. The generation_fuel_eia923 table reports both generation and +fuel consumption, and breaks them down by plant, prime mover, and fuel. In +parallel, the generation_eia923 table reports generation by generator, and the +boiler_fuel_eia923 table reports fuel consumption by boiler. + +The generation_fuel_eia923 table is more complete, but the generation_eia923 + +boiler_fuel_eia923 tables are more granular. The generation_eia923 table +includes only ~55% of the total MWhs reported in the generation_fuel_eia923 +table. + +This module estimates the net electricity generation and fuel consumption +attributable to individual generators based on the more expansive reporting of +the data in the generation_fuel_eia923 table. The main coordinating function +here is :func:`pudl.analysis.allocate_net_gen.allocate_gen_fuel_by_gen`. + +The algorithm we're using assumes: + +* The generation_eia923 table is the authoritative source of information about + how much generation is attributable to an individual generator, if it reports + in that table. +* The generation_fuel_eia923 table is the authoritative source of information + about how much generation and fuel consumption is attributable to an entire + plant. +* The generators_eia860 table provides an exhaustive list of all generators + whose generation is being reported in the generation_fuel_eia923 table. + +We allocate the net generation reported in the generation_fuel_eia923 table on +the basis of plant, prime mover, and fuel type among the generators in each +plant that have matching fuel types. Generation is allocated proportional to +reported generation if it's available, and proportional to each generator's +capacity if generation is not available. + +In more detail: within each year of data, we split the plants into three groups: + +* Plants where ALL generators report in the more granular generation_eia923 + table. +* Plants where NONE of the generators report in the generation_eia923 table. +* Plants where only SOME of the generators report in the generation_eia923 + table. + +In plant-years where ALL generators report more granular generation, the total +net generation reported in the generation_fuel_eia923 table is allocated in +proportion to the generation each generator reported in the generation_eia923 +table. We do this instead of using net_generation_mwh from generation_eia923 +because there are some small discrepancies between the total amounts of +generation reported in these two tables. + +In plant-years where NONE of the generators report more granular generation, +we create a generator record for each associated fuel type. Those records are +merged with the generation_fuel_eia923 table on plant, prime mover code, and +fuel type. Each group of plant, prime mover, and fuel will have some amount of +reported net generation associated with it, and one or more generators. The +net generation is allocated among the generators within the group in proportion +to their capacity. Then the allocated net generation is summed up by generator. + +In the hybrid case, where only SOME of of a plant's generators report the more +granular generation data, we use a combination of the two allocation methods +described above. First, the total generation reported across a plant in the +generation_fuel_eia923 table is allocated between the two categories of +generators (those that report fine-grained generation, and those that don't) +in direct proportion to the fraction of the plant's generation which is reported +in the generation_eia923 table, relative to the total generation reported in the +generation_fuel_eia923 table. + +Note that this methology does not distinguish between primary and secondary +fuel_types for generators. It associates portions of net generation to each +generators in the same plant do not report detailed generation, have the same +prime_mover_code, and use the same fuels, but have very different capacity +factors in reality, this methodology will allocate generation such that they +end up with very similar capacity factors. We imagine this is an uncommon +scenario. + +This methodology has several potential flaws and drawbacks. Because there is no +indicator of what portion of the energy_source_codes (ie. fuel_type), we +associate the net generation equally among them. In effect, if a plant had multiple generators with the same prime_mover_code but opposite primary and -secondary fuels (eg. gen 1 has a primary fuel of 'NG' and secondard fuel of +secondary fuels (eg. gen 1 has a primary fuel of 'NG' and secondary fuel of 'DFO', while gen 2 has a primary fuel of 'DFO' and a secondary fuel of 'NG'), the methodology associates the generation_fuel_eia923 records similarly across -these two generators. Nonetheless, the allocated net generation will still be -porporational to each generators generation_eia923 net generation or capacity. - -This methodology also has an effect of smoothing differences of generators with -the same prime_mover_code and fuel_type. In effect, two similar generators will -appear to have similar capacity factors, especially if they reported no data to -the generation_eia923 table. - -Another methodology that could be worth employing is use the generation_eia923 -table when available and allocate the remaining net generation in a similar -methodology as we have currently employed by using each generators' capacity as -an allocator. For the ~.2% of records which report more net generation in the -generation_eia923 table, we would have to augment that methodology. +these two generators. However, the allocated net generation will still be +porporational to each generator's net generation (if it's reported) or capacity +(if generation is not reported). + """ import logging @@ -70,6 +94,8 @@ import numpy as np import pandas as pd +import pudl.helpers + logger = logging.getLogger(__name__) IDX_GENS = ['plant_id_eia', 'generator_id', 'report_date'] @@ -104,65 +130,100 @@ def allocate_gen_fuel_by_gen(pudl_out): The ``DATA_COLS`` will be scaled to the level of the ``IDX_GENS``. """ - gen_pm_fuel = allocate_gen_fuel_by_gen_pm_fuel(pudl_out) - gen = agg_by_generator(gen_pm_fuel, pudl_out) - _test_gen_fuel_allocation(pudl_out, gen) - return gen - - -def allocate_gen_fuel_by_gen_pm_fuel(pudl_out): + # extract all of the tables from pudl_out early in the process and select + # only the columns we need. this is for speed and clarity. + gf = pudl_out.gf_eia923().loc[ + :, IDX_PM_FUEL + ['net_generation_mwh', 'fuel_consumed_mmbtu']] + gen = pudl_out.gen_original_eia923().loc[ + :, IDX_GENS + ['net_generation_mwh']] + gens = pudl_out.gens_eia860().loc[ + :, IDX_GENS + ['prime_mover_code', 'capacity_mw', 'fuel_type_count', + 'operational_status', 'retirement_date'] + + list(pudl_out.gens_eia860().filter(like='energy_source_code'))] + + # do the allocation! (this function coordinates the bulk of the work in + # this module) + gen_pm_fuel = allocate_gen_fuel_by_gen_pm_fuel(gf, gen, gens) + # aggregate the gen/pm/fuel records back to generator records + gen_allocated = agg_by_generator(gen_pm_fuel) + _test_gen_fuel_allocation(gen, gen_allocated) + return gen_allocated + + +def allocate_gen_fuel_by_gen_pm_fuel(gf, gen, gens, drop_interim_cols=True): """ Proportionally allocate net gen from gen_fuel table to generators. Two main steps here: - * associated gen_fuel data w/ generators - * allocate gen_fuel data proportionally - - The assocation process happens via `associate_gen_tables()`. - - The allocation process entails generating a ratio for each record within a - ``IDX_PM_FUEL`` group. We have two options for generating this ratio: the - net generation in the generation_eia923 table and the capacity from the - generators_eia860 table. We calculate both these ratios, then used the - net generation based ratio if available to allocation a portion of the - associated data fields. - - Args: - pudl_out (pudl.output.pudltabl.PudlTabl): An object used to create - the tables for EIA and FERC Form 1 analysis. + * associate `generation_fuel_eia923` table data w/ generators + * allocate `generation_fuel_eia923` table data proportionally + + The association process happens via `associate_generator_tables()`. + + The allocation process (via `calc_allocation_fraction()`) entails + generating a fraction for each record within a ``IDX_PM_FUEL`` group. We + have two data points for generating this ratio: the net generation in the + generation_eia923 table and the capacity from the generators_eia860 table. + The end result is a `frac` column which is unique for each + generator/prime_mover/fuel record and is used to allocate the associated + net generation from the `generation_fuel_eia923` table. + + Args: + gf (pandas.DataFrame): generator_fuel_eia923 table with columns: + ``IDX_PM_FUEL`` and `net_generation_mwh` and `fuel_consumed_mmbtu`. + gen (pandas.DataFrame): generation_eia923 table with columns: + ``IDX_GENS`` and `net_generation_mwh`. + gens (pandas.DataFrame): generators_eia860 table with cols: + ``IDX_GENS``, `capacity_mw`, `prime_mover_code`, + and all of the `energy_source_code` columns + drop_interim_cols (boolean): True/False flag for dropping interim + columns which are used to generate the `net_generation_mwh` column + (they are mostly the `frac` column and net generataion reported in + the original generation_eia923 and generation_fuel_eia923 tables) + that are useful for debugging. Default is False, which will drop + the columns. Returns: pandas.DataFrame """ - gens_asst = (associate_gen_tables(pudl_out) - .pipe(_associate_unconnected_records) - .pipe(_associate_fuel_type_only, pudl_out)) + gen_assoc = associate_generator_tables(gf=gf, gen=gen, gens=gens) - gen_pm_fuel = make_allocation_ratio(gens_asst).pipe(_test_gen_ratio) + # Generate a fraction to use to allocate net generation by. + # These two methods create a column called `frac`, which will be a fraction + # to allocate net generation from the gf table for each `IDX_PM_FUEL` group + gen_pm_fuel = prep_alloction_fraction(gen_assoc) + gen_pm_fuel_frac = calc_allocation_fraction(gen_pm_fuel) # do the allocating-ing! - gen_pm_fuel = ( - gen_pm_fuel.assign( - # we could x.net_generation_mwh_gen.fillna here if we wanted to + gen_pm_fuel_frac = ( + gen_pm_fuel_frac.assign( + # we could x.net_generation_mwh_g_tbl.fillna here if we wanted to # take the net gen - net_generation_mwh=lambda x: x.net_generation_mwh_gf * x.gen_ratio, + net_generation_mwh=lambda x: x.net_generation_mwh_gf_tbl * x.frac, # let's preserve the gf version of fuel consumption (it didn't show - # up in the tables we pulled together in associate_gen_tables()). - fuel_consumed_mmbtu_gf=lambda x: x.fuel_consumed_mmbtu, - fuel_consumed_mmbtu=lambda x: x.fuel_consumed_mmbtu * x.gen_ratio + # up in the tables we pulled together in associate_generator_tables()). + # TODO: THIS IS A HACK! We need to generate a proper fraction for + # allocating fuel consumption based on the boiler_fuel_eia923 tbl + fuel_consumed_mmbtu_gf_tbl=lambda x: x.fuel_consumed_mmbtu, + fuel_consumed_mmbtu=lambda x: x.fuel_consumed_mmbtu * x.frac ) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "net_generation_mwh": "eia", + })) + .dropna(how='all') + .pipe(_test_gen_pm_fuel_output, gf=gf, gen=gen) + .pipe(pudl.helpers.convert_cols_dtypes, 'eia') ) + if drop_interim_cols: + gen_pm_fuel_frac = gen_pm_fuel_frac[ + IDX_PM_FUEL + + ['generator_id', 'energy_source_code_num', 'net_generation_mwh', + 'fuel_consumed_mmbtu']] + return gen_pm_fuel_frac - gen_pm_fuel = ( - gen_pm_fuel.astype( - {"plant_id_eia": "Int64", - "net_generation_mwh": "float"}) - .pipe(_test_gen_pm_fuel_output, pudl_out) - ) - return gen_pm_fuel - -def agg_by_generator(gen_pm_fuel, pudl_out): +def agg_by_generator(gen_pm_fuel): """ Aggreate the allocated gen fuel data to the generator level. @@ -177,17 +238,15 @@ def agg_by_generator(gen_pm_fuel, pudl_out): return gen -def _stack_generators(pudl_out, idx_stack, cols_to_stack, - cat_col='energy_source_code_num', - stacked_col='fuel_type'): +def stack_generators(gens, + cat_col='energy_source_code_num', + stacked_col='fuel_type'): """ Stack the generator table with a set of columns. Args: - pudl_out (pudl.output.pudltabl.PudlTabl): An object used to create - the tables for EIA and FERC Form 1 analysis. - idx_stack (iterable): list of columns. index to stack based on - cols_to_stack (iterable): list of columns to stack + gens (pandas.DataFrame): generators_eia860 table with cols: ``IDX_GENS`` + and all of the `energy_source_code` columns cat_col (string): name of category column which will end up having the column names of cols_to_stack stacked_col (string): name of column which will end up with the stacked @@ -196,89 +255,142 @@ def _stack_generators(pudl_out, idx_stack, cols_to_stack, Returns: pandas.DataFrame: a dataframe with these columns: idx_stack, cat_col, stacked_col + """ - gens = pudl_out.gens_eia860() + esc = list(gens.filter(like='energy_source_code')) gens_stack_prep = ( - pd.DataFrame(gens.set_index(idx_stack)[cols_to_stack].stack(level=0)) + pd.DataFrame(gens.set_index(IDX_GENS)[esc].stack(level=0)) .reset_index() .rename(columns={'level_3': cat_col, 0: stacked_col}) + .pipe(pudl.helpers.convert_cols_dtypes, 'eia') ) + # merge the stacked df back onto the gens table # we first drop the cols_to_stack so we don't duplicate data gens_stack = pd.merge( - gens.drop(columns=cols_to_stack), + gens.drop(columns=esc), gens_stack_prep, + on=IDX_GENS, how='outer' ) return gens_stack -def associate_gen_tables(pudl_out): +def associate_generator_tables(gf, gen, gens): """ - Assocaite the three tables needed to assign net gen to generators. + Associate the three tables needed to assign net gen to generators. Args: - pudl_out (pudl.output.pudltabl.PudlTabl): An object used to create - the tables for EIA and FERC Form 1 analysis. + gf (pandas.DataFrame): generator_fuel_eia923 table with columns: + ``IDX_PM_FUEL`` and `net_generation_mwh` and `fuel_consumed_mmbtu`. + gen (pandas.DataFrame): generation_eia923 table with columns: + ``IDX_GENS`` and `net_generation_mwh`. + gens (pandas.DataFrame): generators_eia860 table with cols: ``IDX_GENS`` + and all of the `energy_source_code` columns + + TODO: Convert these groupby/merges into transforms. """ - esc = [ - 'energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', - 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6' - ] - - stack_gens = _stack_generators( - pudl_out, idx_stack=IDX_GENS, cols_to_stack=esc, - cat_col='energy_source_code_num', stacked_col='fuel_type') + stack_gens = stack_generators( + gens, cat_col='energy_source_code_num', stacked_col='fuel_type') - # because lots of these input dfs include same info columns, this generates - # drop columnss for fuel_cost. This avoids needing to hard code columns. - drop_cols_gens = [x for x in stack_gens.columns - if x in pudl_out.gen_original_eia923().columns - and x not in IDX_GENS] - gens_asst = ( + gen_assoc = ( pd.merge( stack_gens, - pudl_out.gen_original_eia923().drop(columns=drop_cols_gens), + gen, on=IDX_GENS, how='outer') + .pipe(remove_retired_generators) .merge( - pudl_out.gf_eia923().groupby(by=IDX_PM_FUEL) - .sum(min_count=1).reset_index(), + gf.groupby(by=IDX_PM_FUEL, as_index=False) + .sum(min_count=1), on=IDX_PM_FUEL, - suffixes=('_gen', '_gf'), + suffixes=('_g_tbl', '_gf_tbl'), how='outer', ) ) - gens_asst = ( + gen_assoc = ( pd.merge( - gens_asst, - gens_asst.groupby(by=IDX_FUEL) - [['capacity_mw', 'net_generation_mwh_gen']].sum(min_count=1) - .add_suffix('_fuel_total') + gen_assoc, + gen_assoc.groupby(by=IDX_FUEL) + [['capacity_mw', 'net_generation_mwh_g_tbl']].sum(min_count=1) + .add_suffix('_fuel') .reset_index(), on=IDX_FUEL, ) + .pipe(pudl.helpers.convert_cols_dtypes, 'eia') + .pipe(_associate_unconnected_records) + .pipe(_associate_fuel_type_only, gf=gf) ) - return gens_asst + return gen_assoc + + +def remove_retired_generators(gen_assoc): + """ + Remove the retired generators. + + We don't want to associate net generation to generators that are retired + (or proposed! or any other `operational_status` besides `existing`). + + We do want to keep the generators that retire mid-year and have generator + specific data from the generation_eia923 table. Removing the generators + that retire mid-report year and don't report to the generation_eia923 table + is not exactly a great assumption. For now, we are removing them. We should + employ a strategy that allocates only a portion of the generation to them + based on their operational months (or by doing the allocation on a monthly + basis). + + Args: + gen_assoc (pandas.DataFrame): table of generators with stacked fuel + types and broadcasted net generation data from the + generation_eia923 and generation_fuel_eia923 tables. Output of + `associate_generator_tables()`. + """ + existing = gen_assoc.loc[ + (gen_assoc.operational_status == 'existing') + ] + # keep the gens that retired mid-report-year that have generator + # specific data + retiring = gen_assoc.loc[ + (gen_assoc.operational_status == 'retired') + & (gen_assoc.retirement_date.dt.year == gen_assoc.report_date.dt.year) + & (gen_assoc.net_generation_mwh.notnull()) + ] + + # check how many generators are retiring mid-year that don't have + # gen-specific data. + retiring_removing = gen_assoc.loc[ + (gen_assoc.operational_status == 'retired') + & (gen_assoc.retirement_date.dt.year == gen_assoc.report_date.dt.year) + & (gen_assoc.net_generation_mwh.isnull()) + ] + logger.info( + f'Removing {len(retiring_removing.drop_duplicates(IDX_GENS))} ' + 'generators that retired mid-year out of ' + f'{len(gen_assoc.drop_duplicates(IDX_GENS))}' + ) + + gen_assoc_removed = pd.concat([existing, retiring]) + return gen_assoc_removed def _associate_unconnected_records(eia_generators_merged): """ - Associate unassocaited gen_fuel table records on idx_pm. + Associate unassociated gen_fuel table records on idx_pm. There are a subset of generation_fuel_eia923 records which do not merge onto the stacked generator table on ``IDX_PM_FUEL``. These records generally don't match with the set of prime movers and fuel types in the - stacked generator table. In this method, we associated those straggler, + stacked generator table. In this method, we associate those straggler, unconnected records by merging these records with the stacked generators on the prime mover only. Args: eia_generators_merged (pandas.DataFrame) + """ - # we're assocaiting on the plant/pm level... but we only want to associated - # these unassocaited records w/ the primary fuel type from _stack_generators + # we're associating on the plant/pm level... but we only want to associated + # these unassocaited records w/ the primary fuel type from stack_generators # so we're going to merge on energy_source_code_num and idx_pm = ['plant_id_eia', 'prime_mover_code', 'energy_source_code_num', 'report_date', ] @@ -290,31 +402,29 @@ def _associate_unconnected_records(eia_generators_merged): ) eia_generators_unconnected = ( eia_generators_merged[~connected_mask] - .dropna(axis='columns', how='all') .rename(columns={'fuel_type': 'fuel_type_unconnected'}) .assign(energy_source_code_num='energy_source_code_1') .groupby(by=idx_pm).sum(min_count=1) .reset_index() ) - eia_generators = ( pd.merge( eia_generators_connected, eia_generators_unconnected[ - idx_pm + ['net_generation_mwh_gf', 'fuel_consumed_mmbtu']], + idx_pm + ['net_generation_mwh_gf_tbl', 'fuel_consumed_mmbtu']], on=idx_pm, suffixes=('', '_unconnected'), how='left' ) .assign( - # we want the main and the unconnected net get to be added together - # but sometimes there is no main net get and sometimes there is no + # we want the main and the unconnected net gen to be added together + # but sometimes there is no main net gen and sometimes there is no # unconnected net gen - net_generation_mwh_gf=lambda x: np.where( - x.net_generation_mwh_gf.notnull() - | x.net_generation_mwh_gf_unconnected.notnull(), - x.net_generation_mwh_gf.fillna(0) - + x.net_generation_mwh_gf_unconnected.fillna(0), + net_generation_mwh_gf_tbl=lambda x: np.where( + x.net_generation_mwh_gf_tbl.notnull() + | x.net_generation_mwh_gf_tbl_unconnected.notnull(), + x.net_generation_mwh_gf_tbl.fillna(0) + + x.net_generation_mwh_gf_tbl_unconnected.fillna(0), np.nan ), fuel_consumed_mmbtu=lambda x: np.where( @@ -324,14 +434,16 @@ def _associate_unconnected_records(eia_generators_merged): + x.fuel_consumed_mmbtu_unconnected.fillna(0), np.nan ), - ) + ) # we no longer need these _unconnected columns + .drop(columns=['net_generation_mwh_gf_tbl_unconnected', + 'fuel_consumed_mmbtu_unconnected']) ) return eia_generators -def _associate_fuel_type_only(gens_asst, pudl_out): +def _associate_fuel_type_only(gen_assoc, gf): """ - Assocaite the records w/o prime movers with fuel cost. + Associate the records w/o prime movers with fuel cost. The 2001 and 2002 generation fuel table does not include any prime mover codes. Because of this, we need to associated these records via their fuel @@ -339,10 +451,9 @@ def _associate_fuel_type_only(gens_asst, pudl_out): Note: 2001 and 2002 eia years are not currently integrated into PUDL. """ + # first fine the gf records that have no PM. gf_grouped = ( - pudl_out.gf_eia923() - .groupby(by=IDX_PM_FUEL, dropna=False) - .sum(min_count=1).reset_index() + gf.groupby(by=IDX_PM_FUEL, dropna=False).sum(min_count=1).reset_index() ) gf_missing_pm = ( gf_grouped[gf_grouped[IDX_PM_FUEL].isnull().any(axis=1)] @@ -350,202 +461,326 @@ def _associate_fuel_type_only(gens_asst, pudl_out): .set_index(IDX_FUEL).add_suffix("_fuel").reset_index() ) - gens_asst = pd.merge( - gens_asst, + gen_assoc = pd.merge( + gen_assoc, gf_missing_pm, how='outer', on=IDX_FUEL, indicator=True ) - gens_asst = _associate_fuel_type_only_wo_matching_fuel_type( - gens_asst, gf_grouped) + gen_assoc = _associate_fuel_type_only_wo_matching_fuel_type(gen_assoc) if gf_missing_pm.empty: logger.info( "No records found with fuel-only records. This is expected.") else: logger.info( - f"{len(gf_missing_pm)/len(gens_asst):.02%} records w/o prime movers now" + f"{len(gf_missing_pm)/len(gen_assoc):.02%} records w/o prime movers now" f" associated for: {gf_missing_pm.report_date.dt.year.unique()}") - return gens_asst + return gen_assoc -def _associate_fuel_type_only_wo_matching_fuel_type(gens_asst, gf_grouped): +def _associate_fuel_type_only_wo_matching_fuel_type(gen_assoc): """ - Associated the missing-pm records that don't have matching fuel types. + Associate the missing-pm records that don't have matching fuel types. - There are some generation fuel table records which don't associated with + There are some generation fuel table records which don't associate with any of the energy_source_code's reported in for the generators. For these records, we need to take a step back and associate these records with the full plant. """ idx_plant = ['plant_id_eia', 'report_date'] - gens_asst = pd.merge( - gens_asst, - gens_asst.groupby(by=idx_plant, dropna=False)[['capacity_mw']] + gen_assoc = pd.merge( + gen_assoc, + gen_assoc.groupby(by=idx_plant, dropna=False)[['capacity_mw']] .sum(min_count=1).add_suffix('_plant').reset_index(), on=idx_plant, how='left' ) - gens_asst_w_unassociated = ( + gen_assoc_w_unassociated = ( pd.merge( - gens_asst[ - (gens_asst._merge != 'right_only') - | (gens_asst._merge.isnull()) + gen_assoc[ + (gen_assoc._merge != 'right_only') | (gen_assoc._merge.isnull()) ], - (gens_asst[gens_asst._merge == 'right_only'] + (gen_assoc[gen_assoc._merge == 'right_only'] .groupby(idx_plant) [['net_generation_mwh_fuel', 'fuel_consumed_mmbtu_fuel']] .sum(min_count=1)), on=idx_plant, how='left', - suffixes=('', '_unconnected') + suffixes=('', '_missing_pm') ) + .assign( - net_generation_mwh_gf=lambda x: - x.net_generation_mwh_gf.fillna( - x.net_generation_mwh_fuel - + x.net_generation_mwh_fuel_unconnected.fillna(0) + net_generation_mwh_gf_tbl=lambda x: + x.net_generation_mwh_gf_tbl.fillna( + x.net_generation_mwh_fuel # TODO: what is this? + + x.net_generation_mwh_fuel_missing_pm.fillna(0) ), fuel_consumed_mmbtu=lambda x: x.fuel_consumed_mmbtu.fillna( x.fuel_consumed_mmbtu_fuel - + x.fuel_consumed_mmbtu_fuel_unconnected.fillna(0) + + x.fuel_consumed_mmbtu_fuel_missing_pm.fillna(0) ), ) + .drop(columns=['_merge']) ) - return gens_asst_w_unassociated + return gen_assoc_w_unassociated + + +def prep_alloction_fraction(gen_assoc): + """ + Make flags and aggregations to prepare for the `calc_allocation_ratios()`. + In `calc_allocation_ratios()`, we will break the generators out into four + types - see `calc_allocation_ratios()` docs for details. This function adds + flags for splitting the generators. It also adds -def make_allocation_ratio(gens_asst): - """Generate a ratio to use to allocate net generation by.""" - # generate a flag if the generator exists in - # the generator table (this will be used later on) - # for generating ratios to use to allocate - gens_asst = gens_asst.assign( - exists_in_gen=lambda x: np.where( - x.net_generation_mwh_gen.notnull(), + """ + # flag whether the generator exists in the + # generation table (this will be used later on) + # for calculating ratios to use to allocate net generation + gen_assoc = gen_assoc.assign( + in_g_tbl=lambda x: np.where( + x.net_generation_mwh_g_tbl.notnull(), True, False) ) - gens_gb = gens_asst.groupby(by=IDX_PM_FUEL) + gens_gb = gen_assoc.groupby(by=IDX_PM_FUEL, dropna=False) # get the total values for the merge group # we would use on groupby here with agg but it is much slower # so we're gb-ing twice w/ a merge - # gens_gb.agg({'net_generation_mwh_gen': lambda x: x.sum(min_count=1), + # gens_gb.agg({'net_generation_mwh_g_tbl': lambda x: x.sum(min_count=1), # 'capacity_mw': lambda x: x.sum(min_count=1), - # 'exists_in_gen': 'all'},) + # 'in_g_tbl': 'all'},) gen_pm_fuel = ( - pd.merge( - gens_asst, - gens_gb - [['net_generation_mwh_gen', 'capacity_mw']] - .sum(min_count=1) - .add_suffix('_pm_fuel_total') - .reset_index(), + gen_assoc + .merge( # flag if all generators exist in the generators_eia860 tbl + gens_gb[['in_g_tbl']].all().reset_index(), + on=IDX_PM_FUEL, + suffixes=('', '_all') + ) + .merge( # flag if some generators exist in the generators_eia860 tbl + gens_gb[['in_g_tbl']].any().reset_index(), on=IDX_PM_FUEL, + suffixes=('', '_any') ) + # Net generation and capacity are both proxies that can be used + # to allocate the generation which only shows up in generation_fuel + # Sum them up across the whole plant-prime-fuel group so we can tell + # what fraction of the total capacity each generator is. .merge( - gens_gb[['exists_in_gen']].all().reset_index(), + (gens_gb + [['net_generation_mwh_g_tbl', 'capacity_mw']] + .sum(min_count=1) + .add_suffix('_pm_fuel') + .reset_index()), on=IDX_PM_FUEL, - suffixes=('', '_pm_fuel_total') + ) + .assign( + # fill in the missing generation with zeros (this will help ensure + # the calculations to run the fractions in `calc_allocation_ratios` + # can be consistent) + net_generation_mwh_g_tbl=lambda x: x.net_generation_mwh_g_tbl.fillna( + 0) ) ) - gen_pm_fuel_ratio = ( + # Add a column that indicates how much capacity comes from generators that + # report in the generation table, and how much comes only from generators + # that show up in the generation_fuel table. + gen_pm_fuel = ( pd.merge( gen_pm_fuel, - gen_pm_fuel.groupby(by=IDX_PM_FUEL + ['exists_in_gen']) - [['capacity_mw']] - .sum(min_count=1) - .add_suffix('_exist_in_gen_group') - .reset_index(), - on=IDX_PM_FUEL + ['exists_in_gen'], + gen_pm_fuel.groupby(by=IDX_PM_FUEL + ['in_g_tbl'], dropna=False) + [['capacity_mw']].sum(min_count=1) + .add_suffix('_in_g_tbl_group').reset_index(), + on=IDX_PM_FUEL + ['in_g_tbl'], ) ) + return gen_pm_fuel + + +def calc_allocation_fraction(gen_pm_fuel, drop_interim_cols=True): + """ + Make `frac` column to allocate net gen from the generation fuel table. + + There are three main types of generators: + * "all gen": generators of plants which fully report to the + generators_eia860 table. + * "some gen": generators of plants which partially report to the + generators_eia860 table. + * "gf only": generators of plants which do not report at all to the + generators_eia860 table. + * "no pm": generators that have missing prime movers. + + Each different type of generator needs to be treated slightly differently, + but all will end up with a `frac` column that can be used to allocate + the `net_generation_mwh_gf_tbl`. + + Args: + gen_pm_fuel (pandas.DataFrame): output of `prep_alloction_fraction()`. + drop_interim_cols (boolean): True/False flag for dropping interim + columns which are used to generate the `frac` column (they are + mostly interim frac columns and totals of net generataion from + various groupings of generators) that are useful for debugging. + Default is False. + + """ + # break out the table into these four different generator types. + no_pm_mask = gen_pm_fuel.net_generation_mwh_fuel_missing_pm.notnull() + no_pm = gen_pm_fuel[no_pm_mask] + all_gen = gen_pm_fuel.loc[gen_pm_fuel.in_g_tbl_all & ~no_pm_mask] + some_gen = gen_pm_fuel.loc[ + gen_pm_fuel.in_g_tbl_any & ~gen_pm_fuel.in_g_tbl_all & + ~no_pm_mask] + gf_only = gen_pm_fuel.loc[~gen_pm_fuel.in_g_tbl_any & ~no_pm_mask] + + logger.info("Ratio calc types: \n" + f" All gens w/in generation table: {len(all_gen)}#, {all_gen.capacity_mw.sum():.2} MW\n" + f" Some gens w/in generation table: {len(some_gen)}#, {some_gen.capacity_mw.sum():.2} MW\n" + f" No gens w/in generation table: {len(gf_only)}#, {gf_only.capacity_mw.sum():.2} MW\n" + f" GF table records have no PM: {len(no_pm)}#") + if len(gen_pm_fuel) != len(all_gen) + len(some_gen) + len(gf_only) + len(no_pm): + raise AssertionError( + 'Error in splitting the gens between records showing up fully, ' + 'partially, or not at all in the generation table.' + ) - gen_pm_fuel_ratio = ( - gen_pm_fuel_ratio.assign( - # we have two options for generating a ratio for allocating - # we'll first try to allocated based on net generation from the gen - # table, but we need to scale that based on capacity of the - # generators the report in net gen table - # and if that isn't there we'll allocate based on capacity - gen_ratio_exist_in_gen_group=lambda x: - x.capacity_mw_exist_in_gen_group / x.capacity_mw_pm_fuel_total, - gen_ratio_net_gen=lambda x: - x.net_generation_mwh_gen / - x.net_generation_mwh_gen_pm_fuel_total, - gen_ratio_net_gen_scaled_by_cap=lambda x: - x.gen_ratio_net_gen * x.gen_ratio_exist_in_gen_group, - gen_ratio_cap=lambda x: x.capacity_mw / x.capacity_mw_pm_fuel_total, - # ratio for the records with a missing prime mover that are - # assocaited at the plant fuel level - gen_ratio_net_gen_fuel=lambda x: - x.net_generation_mwh_gf - / x.net_generation_mwh_gen_fuel_total, - gen_ratio_cap_fuel=lambda x: - x.capacity_mw / x.capacity_mw_fuel_total, - gen_ratio_fuel=lambda x: - np.where(x.gen_ratio_net_gen_fuel.notnull() - | x.gen_ratio_net_gen_fuel != 0, - x.gen_ratio_net_gen_fuel, x.gen_ratio_cap_fuel), - # final ratio - gen_ratio=lambda x: - np.where( - x.net_generation_mwh_fuel.notnull(), - x.gen_ratio_fuel, - np.where( - (x.gen_ratio_net_gen_scaled_by_cap.notnull() - | x.gen_ratio_net_gen_scaled_by_cap != 0), - x.gen_ratio_net_gen_scaled_by_cap, x.gen_ratio_cap)),) + # In the case where we have all of teh generation from the generation + # table, we still allocate, because the generation reported in these two + # tables don't always match perfectly + all_gen = all_gen.assign( + frac_net_gen=lambda x: x.net_generation_mwh_g_tbl / + x.net_generation_mwh_g_tbl_pm_fuel, + frac=lambda x: x.frac_net_gen) + # _ = _test_frac(all_gen) + + # a brief explaination of the equations below + # input definitions: + # ng == net generation from the generation table (by generator) + # ngf == net generation from the generation fuel table (summed by PM/Fuel) + # ngt == total net generation from the generation table (summed by PM/Fuel) + # + # y = ngt / ngf (fraction of generation reporting in the generation table) + # z = ng * ngt (fraction of generation from generation table by generator) + # g = y * z (fraction of generation reporting in generation table by generator - frac_gen) + + some_gen = some_gen.assign( + # fraction of the generation that should go to the generators that + # report in the generation table + frac_from_g_tbl=lambda x: + x.net_generation_mwh_g_tbl_pm_fuel / x.net_generation_mwh_gf_tbl, + # for records within these mix groups that do have net gen in the + # generation table.. + frac_net_gen=lambda x: + x.net_generation_mwh_g_tbl / # generator based net gen from gen table + x.net_generation_mwh_g_tbl_pm_fuel, + frac_gen=lambda x: + x.frac_net_gen * x.frac_from_g_tbl, + + # fraction of generation that does not show up in the generation table + frac_missing_from_g_tbl=lambda x: + 1 - x.frac_from_g_tbl, + capacity_mw_missing_from_g_tbl=lambda x: np.where( + x.in_g_tbl, 0, x.capacity_mw), + frac_cap=lambda x: + x.frac_missing_from_g_tbl * \ + (x.capacity_mw_missing_from_g_tbl / x.capacity_mw_in_g_tbl_group), + + # the real deal + # this could aslo be `x.frac_gen + x.frac_cap` because the frac_gen + # should be 0 for any generator that does not have net gen in the g_tbl + # and frac_cap should be 0 for any generator that has net gen in the + # g_tbl. + frac=lambda x: np.where( + x.in_g_tbl, + x.frac_gen, + x.frac_cap) ) + # _ = _test_frac(some_gen) + + # Calculate what fraction of the total capacity is associated with each of + # the generators in the grouping. + gf_only = gf_only.assign( + frac_cap=lambda x: x.capacity_mw / x.capacity_mw_pm_fuel, + frac=lambda x: x.frac_cap) + # _ = _test_frac(gf_only) + + no_pm = no_pm.assign( + # ratio for the records with a missing prime mover that are + # assocaited at the plant fuel level + frac_net_gen_fuel=lambda x: + x.net_generation_mwh_gf_tbl + / x.net_generation_mwh_g_tbl_fuel, + frac_cap_fuel=lambda x: + x.capacity_mw / x.capacity_mw_fuel, + frac=lambda x: np.where( + x.frac_net_gen_fuel.notnull() | x.frac_net_gen_fuel != 0, + x.frac_net_gen_fuel, x.frac_cap_fuel) + ) + + # squish all of these methods back together. + gen_pm_fuel_ratio = pd.concat([all_gen, some_gen, gf_only, no_pm]) + # null out the inf's + gen_pm_fuel_ratio.loc[abs(gen_pm_fuel_ratio.frac) == np.inf] = np.NaN + _ = _test_frac(gen_pm_fuel_ratio) + + # drop all of the columns we needed to get to the `frac` column + if drop_interim_cols: + gen_pm_fuel_ratio = gen_pm_fuel_ratio[ + IDX_PM_FUEL + + ['generator_id', 'energy_source_code_num', 'frac', + 'net_generation_mwh_gf_tbl', 'net_generation_mwh_g_tbl', + 'capacity_mw', 'fuel_consumed_mmbtu']] return gen_pm_fuel_ratio -def _test_gen_ratio(gen_pm_fuel): - # test! Check if each of the IDX_PM_FUEL groups gen_ratio's add up to 1 +def _test_frac(gen_pm_fuel): + # test! Check if each of the IDX_PM_FUEL groups frac's add up to 1 ratio_test_pm_fuel = ( gen_pm_fuel.groupby(IDX_PM_FUEL) - [['gen_ratio']].sum(min_count=1) + [['frac', 'net_generation_mwh_g_tbl']].sum(min_count=1) .reset_index() ) ratio_test_fuel = ( gen_pm_fuel.groupby(IDX_FUEL) - [['gen_ratio', 'net_generation_mwh_fuel']].sum(min_count=1) + [['frac', 'net_generation_mwh_fuel']].sum(min_count=1) .reset_index() ) - ratio_test = ( + frac_test = ( pd.merge( ratio_test_pm_fuel, ratio_test_fuel, on=IDX_FUEL, suffixes=("", "_fuel") ) .assign( - gen_ratio=lambda x: np.where( - x.net_generation_mwh_fuel.isnull(), - x.gen_ratio, x.gen_ratio_fuel + frac_pm_fuel=lambda x: x.frac, + frac=lambda x: np.where( + x.frac_pm_fuel.notnull(), + x.frac_pm_fuel, x.frac_fuel, ) ) ) - ratio_test_bad = ratio_test[ - ~np.isclose(ratio_test.gen_ratio, 1) - & ratio_test.gen_ratio.notnull() + frac_test_bad = frac_test[ + ~np.isclose(frac_test.frac, 1) + & frac_test.frac.notnull() ] - if not ratio_test_bad.empty: - raise AssertionError( - f"Ooopsies. You got {len(ratio_test_bad)} records where the " - "'gen_ratio' column isn't adding up to 1 for each 'IDX_PM_FUEL' " - "group. Check 'make_allocation_ratio()'" + if not frac_test_bad.empty: + # raise AssertionError( + warnings.warn( + f"Ooopsies. You got {len(frac_test_bad)} records where the " + "'frac' column isn't adding up to 1 for each 'IDX_PM_FUEL' " + "group. Check 'make_allocation_frac()'" ) - return gen_pm_fuel + return frac_test_bad -def _test_gen_pm_fuel_output(gen_pm_fuel, pudl_out): +def _test_gen_pm_fuel_output(gen_pm_fuel, gf, gen): # this is just for testing/debugging def calc_net_gen_diff(gen_pm_fuel, idx): gen_pm_fuel_test = ( @@ -558,7 +793,7 @@ def calc_net_gen_diff(gen_pm_fuel, idx): how='outer' ) .assign(net_generation_mwh_diff=lambda x: - x.net_generation_mwh_gf + x.net_generation_mwh_gf_tbl - x.net_generation_mwh_test) ) return gen_pm_fuel_test @@ -581,18 +816,16 @@ def calc_net_gen_diff(gen_pm_fuel, idx): "off from their 'IDX_PM_FUEL' group") no_cap_gen = gen_pm_fuel_test[ (gen_pm_fuel_test.capacity_mw.isnull()) - & (gen_pm_fuel_test.net_generation_mwh_gen.isnull()) + & (gen_pm_fuel_test.net_generation_mwh_g_tbl.isnull()) ] if len(no_cap_gen) > 15: logger.info( f'Warning: {len(no_cap_gen)} records have no capacity or net gen') - gen_fuel = pudl_out.gf_eia923() - gen = pudl_out.gen_original_eia923() # remove the junk/corrective plants - fuel_net_gen = gen_fuel[ - gen_fuel.plant_id_eia != '99999'].net_generation_mwh.sum() - fuel_consumed = gen_fuel[ - gen_fuel.plant_id_eia != '99999'].fuel_consumed_mmbtu.sum() + fuel_net_gen = gf[ + gf.plant_id_eia != '99999'].net_generation_mwh.sum() + fuel_consumed = gf[ + gf.plant_id_eia != '99999'].fuel_consumed_mmbtu.sum() logger.info( "gen v fuel table net gen diff: " f"{(gen.net_generation_mwh.sum())/fuel_net_gen:.1%}") @@ -602,14 +835,17 @@ def calc_net_gen_diff(gen_pm_fuel, idx): logger.info( "new v fuel table fuel (mmbtu) diff: " f"{(gen_pm_fuel_test.fuel_consumed_mmbtu.sum())/fuel_consumed:.1%}") + + gen_pm_fuel_test = gen_pm_fuel_test.drop( + columns=['net_generation_mwh_test', 'net_generation_mwh_diff']) return gen_pm_fuel_test -def _test_gen_fuel_allocation(pudl_out, gen_allocated, ratio=.05): +def _test_gen_fuel_allocation(gen, gen_allocated, ratio=.05): gens_test = ( pd.merge( gen_allocated, - pudl_out.gen_original_eia923(), + gen, on=IDX_GENS, suffixes=('_new', '_og') ) diff --git a/src/pudl/analysis/mcoe.py b/src/pudl/analysis/mcoe.py index b94af0ea63..5b2c0d40bb 100644 --- a/src/pudl/analysis/mcoe.py +++ b/src/pudl/analysis/mcoe.py @@ -21,7 +21,7 @@ def heat_rate_by_unit(pudl_out): - boiler_id The unit_id is associated with generation records based on report_date, - plant_id_eia, and generator_id. Analogously, the unit_id is associtated + plant_id_eia, and generator_id. Analogously, the unit_id is associated with boiler fuel consumption records based on report_date, plant_id_eia, and boiler_id. @@ -33,7 +33,7 @@ def heat_rate_by_unit(pudl_out): - plant_id_eia - unit_id_pudl - net_generation_mwh - - total_heat_content_mmbtu + - fuel_consumed_mmbtu - heat_rate_mmbtu_mwh """ @@ -43,76 +43,135 @@ def heat_rate_by_unit(pudl_out): raise ValueError( "pudl_out must include a frequency for heat rate calculation") - # Create a dataframe containing only the unit-generator mappings: - bga_gens = pudl_out.bga()[['report_date', - 'plant_id_eia', - 'generator_id', - 'unit_id_pudl']].drop_duplicates() - gen = pudl_out.gen_eia923() - - # Merge those unit ids into the generation data: - gen_w_unit = pudl.helpers.merge_on_date_year( - gen, bga_gens, on=['plant_id_eia', 'generator_id']) # Sum up the net generation per unit for each time period: - gen_gb = gen_w_unit.groupby(['report_date', - 'plant_id_eia', - 'unit_id_pudl']) - gen_by_unit = gen_gb.agg({'net_generation_mwh': pudl.helpers.sum_na}) - gen_by_unit = gen_by_unit.reset_index() - - # Create a dataframe containingonly the unit-boiler mappings: - bga_boils = pudl_out.bga()[['report_date', 'plant_id_eia', - 'boiler_id', 'unit_id_pudl']].drop_duplicates() - # Merge those unit ids into the boiler fule consumption data: - bf_w_unit = pudl.helpers.merge_on_date_year( - pudl_out.bf_eia923(), bga_boils, on=['plant_id_eia', 'boiler_id']) + gen_by_unit = ( + pudl_out.gen_eia923() + .groupby(['report_date', 'plant_id_eia', 'unit_id_pudl']) + .agg({'net_generation_mwh': pudl.helpers.sum_na}) + .reset_index() + ) + # Sum up all the fuel consumption per unit for each time period: - bf_gb = bf_w_unit.groupby(['report_date', - 'plant_id_eia', - 'unit_id_pudl']) - bf_by_unit = bf_gb.agg({'total_heat_content_mmbtu': pudl.helpers.sum_na}) - bf_by_unit = bf_by_unit.reset_index() + bf_by_unit = ( + pudl_out.bf_eia923() + .groupby(['report_date', 'plant_id_eia', 'unit_id_pudl']) + .agg({'fuel_consumed_mmbtu': pudl.helpers.sum_na}) + .reset_index() + ) # Merge together the per-unit generation and fuel consumption data so we # can calculate a per-unit heat rate: - hr_by_unit = pd.merge(gen_by_unit, bf_by_unit, - on=['report_date', 'plant_id_eia', 'unit_id_pudl'], - validate='one_to_one') - hr_by_unit['heat_rate_mmbtu_mwh'] = \ - hr_by_unit.total_heat_content_mmbtu / hr_by_unit.net_generation_mwh + hr_by_unit = ( + pd.merge( + gen_by_unit, + bf_by_unit, + on=['report_date', 'plant_id_eia', 'unit_id_pudl'], + validate='one_to_one' + ) + .assign( + heat_rate_mmbtu_mwh=lambda x: x.fuel_consumed_mmbtu / x.net_generation_mwh + ) + ) - return hr_by_unit + return pudl.helpers.convert_cols_dtypes( + hr_by_unit, + data_source="eia", + name="hr_by_unit", + ) def heat_rate_by_gen(pudl_out): - """Convert by-unit heat rate to by-generator, adding fuel type & count.""" + """ + Convert per-unit heat rate to by-generator, adding fuel type & count. + + Heat rates really only make sense at the unit level, since input fuel and + output electricity are comingled at the unit level, but it is useful in + many contexts to have that per-unit heat rate associated with each of the + underlying generators, as much more information is available about the + generators. + + To combine the (potentially) more granular temporal information from the + per-unit heat rates with annual generator level attributes, we have to do + a many-to-many merge. This can't be done easily with merge_asof(), so we + treat the year and month fields as categorial variables, and do a normal + inner merge that broadcasts monthly dates in one direction, and generator + IDs in the other. + + Returns: + pandas.DataFrame: with columns report_date, plant_id_eia, unit_id_pudl, + generator_id, heat_rate_mmbtu_mwh, fuel_type_code_pudl, fuel_type_count. + The output will have a time frequency corresponding to that of the + input pudl_out. Output data types are set to their canonical values + before returning. + + Raises: + ValueError if pudl_out.freq is None. + + """ # pudl_out must have a freq, otherwise capacity factor will fail and merges # between tables with different frequencies will fail if pudl_out.freq is None: raise ValueError( "pudl_out must include a frequency for heat rate calculation") - bga_gens = pudl_out.bga()[['report_date', - 'plant_id_eia', - 'unit_id_pudl', - 'generator_id']].drop_duplicates() - # Associate those heat rates with individual generators. This also means - # losing the net generation and fuel consumption information for now. - hr_by_gen = pudl.helpers.merge_on_date_year( + bga_gens = ( + pudl_out.bga_eia860() + .loc[:, ['report_date', 'plant_id_eia', 'unit_id_pudl', 'generator_id']] + .drop_duplicates() + .assign(year=lambda x: x.report_date.dt.year) + .drop("report_date", axis="columns") + ) + + hr_by_unit = ( pudl_out.hr_by_unit() - [['report_date', 'plant_id_eia', - 'unit_id_pudl', 'heat_rate_mmbtu_mwh']], - bga_gens, on=['plant_id_eia', 'unit_id_pudl'] + .assign(year=lambda x: x.report_date.dt.year) + .loc[:, [ + "year", + "report_date", + "plant_id_eia", + "unit_id_pudl", + "heat_rate_mmbtu_mwh" + ]] + ) + + hr_by_gen = ( + pd.merge( + bga_gens, + hr_by_unit, + on=["year", "plant_id_eia", "unit_id_pudl"], + how="inner", + validate="many_to_many", + ) + .loc[:, [ + "report_date", + "plant_id_eia", + "unit_id_pudl", + "generator_id", + "heat_rate_mmbtu_mwh" + ]] + ) + + # Bring in generator specific fuel type & fuel count. + hr_by_gen = pudl.helpers.clean_merge_asof( + left=hr_by_gen, + right=pudl_out.gens_eia860()[[ + 'report_date', + 'plant_id_eia', + 'generator_id', + 'fuel_type_code_pudl', + 'fuel_type_count' + ]], + by={ + "plant_id_eia": "eia", + "generator_id": "eia", + } ) - hr_by_gen = hr_by_gen.drop('unit_id_pudl', axis=1) - # Now bring information about generator fuel type & count - hr_by_gen = pudl.helpers.merge_on_date_year( + + return pudl.helpers.convert_cols_dtypes( hr_by_gen, - pudl_out.gens_eia860()[['report_date', 'plant_id_eia', 'generator_id', - 'fuel_type_code_pudl', 'fuel_type_count']], - on=['plant_id_eia', 'generator_id'] + data_source="eia", + name="hr_by_gen", ) - return hr_by_gen def fuel_cost(pudl_out): @@ -148,44 +207,63 @@ def fuel_cost(pudl_out): # Split up the plants on the basis of how many different primary energy # sources the component generators have: - hr_by_gen = pudl_out.hr_by_gen()[['plant_id_eia', - 'report_date', - 'generator_id', - 'heat_rate_mmbtu_mwh']] - gens = pudl_out.gens_eia860()[['plant_id_eia', - 'report_date', - 'plant_name_eia', - 'plant_id_pudl', - 'generator_id', - 'utility_id_eia', - 'utility_name_eia', - 'utility_id_pudl', - 'fuel_type_count', - 'fuel_type_code_pudl']] + hr_by_gen = ( + pudl_out.hr_by_gen() + .loc[:, [ + 'plant_id_eia', + 'generator_id', + 'unit_id_pudl', + 'report_date', + 'heat_rate_mmbtu_mwh' + ]] + ) + gens = ( + pudl_out.gens_eia860() + .loc[:, [ + 'plant_id_eia', + 'report_date', + 'plant_name_eia', + 'plant_id_pudl', + 'generator_id', + 'utility_id_eia', + 'utility_name_eia', + 'utility_id_pudl', + 'fuel_type_count', + 'fuel_type_code_pudl' + ]] + ) # We are inner merging here, which means that we don't get every generator # in this output... we only get the ones that show up in hr_by_gen. # See Issue #608 - gen_w_ft = pudl.helpers.merge_on_date_year( - hr_by_gen, gens, - on=['plant_id_eia', 'generator_id'], - how='inner') + gen_w_ft = pudl.helpers.clean_merge_asof( + left=hr_by_gen, + right=gens, + by={ + "plant_id_eia": "eia", + "generator_id": "eia", + } + ) one_fuel = gen_w_ft[gen_w_ft.fuel_type_count == 1] multi_fuel = gen_w_ft[gen_w_ft.fuel_type_count > 1] # Bring the single fuel cost & generation information together for just # the one fuel plants: - one_fuel = pd.merge(one_fuel, - pudl_out.frc_eia923()[['plant_id_eia', - 'report_date', - 'fuel_cost_per_mmbtu', - 'fuel_type_code_pudl', - 'total_fuel_cost', - 'total_heat_content_mmbtu', - 'fuel_cost_from_eiaapi', - ]], - how='left', on=['plant_id_eia', 'report_date']) + one_fuel = pd.merge( + one_fuel, + pudl_out.frc_eia923()[[ + 'plant_id_eia', + 'report_date', + 'fuel_cost_per_mmbtu', + 'fuel_type_code_pudl', + 'total_fuel_cost', + 'fuel_consumed_mmbtu', + 'fuel_cost_from_eiaapi', + ]], + how='left', + on=['plant_id_eia', 'report_date'] + ) # We need to retain the different energy_source_code information from the # generators (primary for the generator) and the fuel receipts (which is # per-delivery), and in the one_fuel case, there will only be a single @@ -223,12 +301,12 @@ def fuel_cost(pudl_out): one_fuel_gb = one_fuel.groupby(by=['report_date', 'plant_id_eia']) one_fuel_agg = one_fuel_gb.agg({ 'total_fuel_cost': pudl.helpers.sum_na, - 'total_heat_content_mmbtu': pudl.helpers.sum_na, + 'fuel_consumed_mmbtu': pudl.helpers.sum_na, 'fuel_cost_from_eiaapi': 'any', }) one_fuel_agg['fuel_cost_per_mmbtu'] = \ one_fuel_agg['total_fuel_cost'] / \ - one_fuel_agg['total_heat_content_mmbtu'] + one_fuel_agg['fuel_consumed_mmbtu'] one_fuel_agg = one_fuel_agg.reset_index() one_fuel = pd.merge( one_fuel[['plant_id_eia', 'report_date', 'generator_id', @@ -242,17 +320,25 @@ def fuel_cost(pudl_out): 'fuel_cost_per_mmbtu', 'heat_rate_mmbtu_mwh', 'fuel_cost_from_eiaapi', ]] - fuel_cost = one_fuel.append(multi_fuel, sort=True) - fuel_cost['fuel_cost_per_mwh'] = \ - fuel_cost['fuel_cost_per_mmbtu'] * fuel_cost['heat_rate_mmbtu_mwh'] - fuel_cost = \ - fuel_cost.sort_values(['report_date', 'plant_id_eia', 'generator_id']) + fc = ( + one_fuel.append(multi_fuel, sort=True) + .assign( + fuel_cost_per_mwh=lambda x: x.fuel_cost_per_mmbtu * x.heat_rate_mmbtu_mwh + ) + .sort_values(['report_date', 'plant_id_eia', 'generator_id']) + ) - out_df = gen_w_ft.drop('heat_rate_mmbtu_mwh', axis=1) - out_df = pd.merge(out_df.drop_duplicates(), fuel_cost, - on=['report_date', 'plant_id_eia', 'generator_id']) + out_df = ( + gen_w_ft.drop('heat_rate_mmbtu_mwh', axis=1) + .drop_duplicates() + .merge(fc, on=['report_date', 'plant_id_eia', 'generator_id']) + ) - return out_df + return pudl.helpers.convert_cols_dtypes( + out_df, + data_source="eia", + name="fuel_cost", + ) def capacity_factor(pudl_out, min_cap_fact=0, max_cap_fact=1.5): @@ -275,24 +361,38 @@ def capacity_factor(pudl_out, min_cap_fact=0, max_cap_fact=1.5): ) # Only include columns to be used - gens_eia860 = pudl_out.gens_eia860()[['plant_id_eia', - 'report_date', - 'generator_id', - 'capacity_mw']] + gens_eia860 = ( + pudl_out.gens_eia860() + .loc[:, [ + 'plant_id_eia', + 'report_date', + 'generator_id', + 'capacity_mw' + ]] + ) - gen = pudl_out.gen_eia923() - gen = gen[['plant_id_eia', 'report_date', - 'generator_id', 'net_generation_mwh']] + gen = ( + pudl_out.gen_eia923() + .loc[:, [ + 'plant_id_eia', + 'report_date', + 'generator_id', + 'net_generation_mwh' + ]] + ) # merge the generation and capacity to calculate capacity factor - capacity_factor = pudl.helpers.merge_on_date_year(gen, - gens_eia860, - on=['plant_id_eia', - 'generator_id'], - how='inner') + cf = pudl.helpers.clean_merge_asof( + left=gen, + right=gens_eia860, + by={ + "plant_id_eia": "eia", + "generator_id": "eia", + } + ) # get a unique set of dates to generate the number of hours - dates = capacity_factor['report_date'].drop_duplicates() + dates = cf['report_date'].drop_duplicates() dates_to_hours = pd.DataFrame( data={'report_date': dates, 'hours': dates.apply( @@ -301,49 +401,63 @@ def capacity_factor(pudl_out, min_cap_fact=0, max_cap_fact=1.5): pd.date_range(d, periods=2, freq=pudl_out.freq)[0]) / pd.Timedelta(hours=1))}) - # merge in the hours for the calculation - capacity_factor = capacity_factor.merge(dates_to_hours, on=['report_date']) - - # actually calculate capacity factor wooo! - capacity_factor['capacity_factor'] = \ - capacity_factor['net_generation_mwh'] / \ - (capacity_factor['capacity_mw'] * capacity_factor['hours']) - - # Replace unrealistic capacity factors with NaN - capacity_factor = pudl.helpers.oob_to_nan( - capacity_factor, ['capacity_factor'], lb=min_cap_fact, ub=max_cap_fact) - - # drop the hours column, cause we don't need it anymore - capacity_factor.drop(['hours'], axis=1, inplace=True) + cf = ( + # merge in the hours for the calculation + cf.merge(dates_to_hours, on=['report_date']) + # actually calculate capacity factor wooo! + .assign( + capacity_factor=lambda x: x.net_generation_mwh / (x.capacity_mw * x.hours) + ) + # Replace unrealistic capacity factors with NaN + .pipe( + pudl.helpers.oob_to_nan, + ['capacity_factor'], + lb=min_cap_fact, + ub=max_cap_fact + ) + .drop(['hours'], axis=1) + ) - return capacity_factor + return pudl.helpers.convert_cols_dtypes( + cf, + data_source="eia", + name="capacity_factor", + ) -def mcoe(pudl_out, - min_heat_rate=5.5, min_fuel_cost_per_mwh=0.0, - min_cap_fact=0.0, max_cap_fact=1.5): +def mcoe( + pudl_out, + min_heat_rate=5.5, + min_fuel_cost_per_mwh=0.0, + min_cap_fact=0.0, + max_cap_fact=1.5, + all_gens=True, +): """ Compile marginal cost of electricity (MCOE) at the generator level. - Use data from EIA 923, EIA 860, and (eventually) FERC Form 1 to estimate - the MCOE of individual generating units. The calculation is performed at - the time resolution, and for the period indicated by the pudl_out object. - that is passed in. + Use data from EIA 923, EIA 860, and (someday) FERC Form 1 to estimate + the MCOE of individual generating units. The calculation is performed over + the range of times and at the time resolution of the input pudl_out object. Args: - pudl_out: a PudlTabl object, specifying the time resolution and - date range for which the calculations should be performed. - min_heat_rate: lowest plausible heat rate, in mmBTU/MWh. Any MCOE - records with lower heat rates are presumed to be invalid, and are - discarded before returning. - min_cap_fact, max_cap_fact: minimum & maximum generator capacity + pudl_out (pudl.output.pudltable.PudlTabl): a PUDL output object + specifying the time resolution and date range for which the + calculations should be performed. + min_heat_rate (float): lowest plausible heat rate, in mmBTU/MWh. Any + MCOE records with lower heat rates are presumed to be invalid, and + are discarded before returning. + min_cap_fact, max_cap_fact (float): minimum & maximum generator capacity factor. Generator records with a lower capacity factor will be filtered out before returning. This allows the user to exclude generators that aren't being used enough to have valid. - min_fuel_cost_per_mwh: minimum fuel cost on a per MWh basis that is - required for a generator record to be considered valid. For some + min_fuel_cost_per_mwh (float): minimum fuel cost on a per MWh basis that + is required for a generator record to be considered valid. For some reason there are now a large number of $0 fuel cost records, which previously would have been NaN. + all_gens (bool): if True, include attributes of all generators in the + :ref:`generators_eia860` table, rather than just the generators + which have records in the derived MCOE values. True by default. Returns: pandas.DataFrame: a dataframe organized by date and generator, @@ -351,67 +465,103 @@ def mcoe(pudl_out, cost on a per MWh and MMBTU basis, heat rates, and net generation. """ - # because lots of these input dfs include same info columns, this generates - # drop columnss for fuel_cost. This avoids needing to hard code columns. - merge_cols = ['plant_id_eia', 'generator_id', 'report_date'] - drop_cols = [x for x in pudl_out.gens_eia860().columns - if x in pudl_out.fuel_cost().columns and x not in merge_cols] - # start with the generators table so we have all of the generators - mcoe_out = pudl.helpers.merge_on_date_year( - pudl_out.fuel_cost().drop(drop_cols, axis=1), - pudl_out.gens_eia860(), - on=[x for x in merge_cols if x != 'report_date'], - how='inner', + gens_idx = ["report_date", "plant_id_eia", "generator_id"] + + # Bring together all derived values we've calculated in the MCOE process: + mcoe_out = ( + pd.merge( + pudl_out.fuel_cost() + .loc[:, gens_idx + [ + "fuel_cost_from_eiaapi", + "fuel_cost_per_mmbtu", + "heat_rate_mmbtu_mwh", + "fuel_cost_per_mwh" + ]], + pudl_out.capacity_factor() + .loc[:, gens_idx + ["net_generation_mwh", "capacity_factor"]], + on=gens_idx, + how="outer", + ) + # Calculate a couple more derived values: + .assign( + total_mmbtu=lambda x: x.net_generation_mwh * x.heat_rate_mmbtu_mwh, + total_fuel_cost=lambda x: x.total_mmbtu * x.fuel_cost_per_mmbtu, + ) + .pipe( + pudl.helpers.oob_to_nan, + ['heat_rate_mmbtu_mwh'], + lb=min_heat_rate, + ub=None + ) + .pipe( + pudl.helpers.oob_to_nan, + ['fuel_cost_per_mwh'], + lb=min_fuel_cost_per_mwh, + ub=None + ) + .pipe( + pudl.helpers.oob_to_nan, + ['capacity_factor'], + lb=min_cap_fact, + ub=max_cap_fact + ) + # Make sure the merge worked! + .pipe( + pudl.validate.no_null_rows, + df_name="fuel_cost + capacity_factor", + thresh=0.9 + ) + .pipe( + pudl.validate.no_null_cols, + df_name="fuel_cost + capacity_factor" + ) ) - # Bring together the fuel cost and capacity factor dataframes, which - # also include heat rate information. - mcoe_out = pd.merge( - mcoe_out, - pudl_out.capacity_factor(min_cap_fact=min_cap_fact, - max_cap_fact=max_cap_fact) - [['report_date', 'plant_id_eia', 'generator_id', - 'capacity_factor', 'net_generation_mwh']], - on=['report_date', 'plant_id_eia', 'generator_id'], - how='outer') - - # Bring the PUDL Unit IDs into the output dataframe so we can see how - # the generators are really grouped. - mcoe_out = pudl.helpers.merge_on_date_year( - mcoe_out, - pudl_out.bga()[['report_date', - 'plant_id_eia', - 'unit_id_pudl', - 'generator_id']].drop_duplicates(), - how='left', - on=['plant_id_eia', 'generator_id']) - # Instead of getting the total MMBTU through this multiplication... we - # could also calculate the total fuel consumed on a per-unit basis, from - # the boiler_fuel table, and then determine what proportion should be - # distributed to each generator based on its heat-rate and net generation. - mcoe_out['total_mmbtu'] = \ - mcoe_out.net_generation_mwh * mcoe_out.heat_rate_mmbtu_mwh - mcoe_out['total_fuel_cost'] = \ - mcoe_out.total_mmbtu * mcoe_out.fuel_cost_per_mmbtu - - first_cols = ['report_date', - 'plant_id_eia', - 'plant_id_pudl', - 'unit_id_pudl', - 'generator_id', - 'plant_name_eia', - 'utility_id_eia', - 'utility_id_pudl', - 'utility_name_eia'] - mcoe_out = pudl.helpers.organize_cols(mcoe_out, first_cols) - mcoe_out = mcoe_out.sort_values( - ['plant_id_eia', 'unit_id_pudl', 'generator_id', 'report_date'] + + # Combine MCOE derived values with all the generator attributes: + mcoe_out = ( + pd.merge( + left=( + pudl_out.gens_eia860() + .assign(year=lambda x: x.report_date.dt.year) + .drop("report_date", axis="columns") + ), + right=mcoe_out.assign(year=lambda x: x.report_date.dt.year), + # This "how" determines whether MCOE or gens_eia860 is the backbone + how="left" if all_gens else "right", + on=["year", "plant_id_eia", "generator_id"] + ) + .astype({"year": str}) + .assign(report_date=lambda x: x.report_date.fillna(pd.to_datetime(x.year))) + .drop("year", axis="columns") + .pipe(pudl.validate.no_null_rows, df_name="mcoe_all_gens", thresh=0.9) + ) + + # Organize the dataframe for easier legibility + mcoe_out = ( + mcoe_out.pipe( + pudl.helpers.organize_cols, [ + 'plant_id_eia', + 'generator_id', + 'report_date', + 'unit_id_pudl', + 'plant_id_pudl', + 'plant_name_eia', + 'utility_id_eia', + 'utility_id_pudl', + 'utility_name_eia', + ]) + .sort_values([ + 'plant_id_eia', + 'unit_id_pudl', + 'generator_id', + 'report_date', + ]) + # Set column data types to canonical values: + .pipe( + pudl.helpers.convert_cols_dtypes, + data_source="eia", + name="mcoe", + ) ) - # Filter the output based on the range of validity supplied by the user: - mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ['heat_rate_mmbtu_mwh'], - lb=min_heat_rate, ub=None) - mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ['fuel_cost_per_mwh'], - lb=min_fuel_cost_per_mwh, ub=None) - mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ['capacity_factor'], - lb=min_cap_fact, ub=max_cap_fact) return mcoe_out diff --git a/src/pudl/constants.py b/src/pudl/constants.py index 0bbf5fcb2a..cbf3e7783e 100644 --- a/src/pudl/constants.py +++ b/src/pudl/constants.py @@ -457,20 +457,6 @@ ############################################################################## # EIA 923 Spreadsheet Metadata ############################################################################## -# patterns for matching columns to months: -month_dict_eia923 = {1: '_january$', - 2: '_february$', - 3: '_march$', - 4: '_april$', - 5: '_may$', - 6: '_june$', - 7: '_july$', - 8: '_august$', - 9: '_september$', - 10: '_october$', - 11: '_november$', - 12: '_december$'} -"""dict: A dictionary mapping column numbers (keys) to months (values).""" ############################################################################## # EIA 860 Spreadsheet Metadata @@ -789,6 +775,7 @@ 'BA': 'Energy Storage, Battery', 'BT': 'Turbines Used in a Binary Cycle. Including those used for geothermal applications', 'CA': 'Combined-Cycle -- Steam Part', + 'CC': 'Combined-Cycle, Total Unit', 'CE': 'Energy Storage, Compressed Air', 'CP': 'Energy Storage, Concentrated Solar Power', 'CS': 'Combined-Cycle Single-Shaft Combustion Turbine and Steam Turbine share of single', @@ -1145,6 +1132,7 @@ contract_type_eia923 = { 'C': 'Contract - Fuel received under a purchase order or contract with a term of one year or longer. Contracts with a shorter term are considered spot purchases ', 'NC': 'New Contract - Fuel received under a purchase order or contract with duration of one year or longer, under which deliveries were first made during the reporting month', + 'N': 'New Contract - see NC code. This abbreviation existed only in 2008 before being replaced by NC.', 'S': 'Spot Purchase', 'T': 'Tolling Agreement – Fuel received under a tolling agreement (bartering arrangement of fuel for generation)' } @@ -1220,7 +1208,7 @@ 'RS': 'RUS', # Russia 'UK': 'GBR', # United Kingdom of Great Britain 'VZ': 'VEN', # Venezuela - 'OC': 'other_country', + 'OT': 'other_country', 'IM': 'unknown' } """dict: A dictionary mapping coal mine country codes (keys) to ISO-3166-1 three @@ -1495,7 +1483,7 @@ 'eia860': tuple(range(2001, 2020)), 'eia861': tuple(range(1990, 2020)), 'eia923': tuple(range(2001, 2020)), - 'epacems': tuple(range(1995, 2020)), + 'epacems': tuple(range(1995, 2021)), 'epaipm': (None, ), 'ferc1': tuple(range(1994, 2020)), 'ferc714': (None, ), @@ -1517,10 +1505,10 @@ 'years': tuple(range(2001, 2020)) }, 'eia923': { - 'years': tuple(range(2009, 2020)) + 'years': tuple(range(2001, 2020)) }, 'epacems': { - 'years': tuple(range(1995, 2020)), + 'years': tuple(range(1995, 2021)), 'states': tuple(cems_states.keys())}, 'ferc1': { 'years': tuple(range(1994, 2020)) diff --git a/src/pudl/convert/datapkg_to_rst.py b/src/pudl/convert/datapkg_to_rst.py index dcf0e981e0..e9468e5908 100644 --- a/src/pudl/convert/datapkg_to_rst.py +++ b/src/pudl/convert/datapkg_to_rst.py @@ -39,12 +39,17 @@ =============================================================================== PUDL Data Dictionary =============================================================================== + +The following data tables have been cleaned and transformed by our ETL process. + {% for resource in resources %} .. _{{ resource.name }}: ------------------------------------------------------------------------------- {{ resource.name }} ------------------------------------------------------------------------------- + +{{ resource.description | wordwrap(78)}} `Browse or query this table in Datasette. `__ .. list-table:: diff --git a/src/pudl/dfc.py b/src/pudl/dfc.py index 5aff9050f6..adb94aaf1f 100644 --- a/src/pudl/dfc.py +++ b/src/pudl/dfc.py @@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) -class TableExists(Exception): +class TableExistsError(Exception): """The table already exists. Either the table already exists in the DataFrameCollection when it is added or the file @@ -92,19 +92,19 @@ def _create_file(self, name: str) -> fsspec.core.OpenFile: """Open the file that should hold the serialized contentes for the table. Raises: - TableExists if the underlying file already exists. + TableExistsError if the underlying file already exists. """ filename = self._get_filename(name, self._instance_id) fs, _, _ = fsspec.get_fs_token_paths(filename) if fs.exists(filename): - raise TableExists( + raise TableExistsError( f'{filename} containing serialized data for table {name} already exists.') return fsspec.open(filename, "wb") def store(self, name: str, data: pd.DataFrame): """Adds named dataframe to collection and stores its contents on disk.""" if name in self._table_ids: - raise TableExists(f'Table {name} already present in the DFC.') + raise TableExistsError(f'Table {name} already present in the DFC.') with self._create_file(name) as fd: data.to_pickle(fd) self._table_ids[name] = self._instance_id @@ -115,7 +115,7 @@ def add_reference(self, name: str, table_id: uuid.UUID): This assumes that the data is already present on disk. """ if name in self._table_ids: - raise TableExists(f'Table {name} already exists in this DFC.') + raise TableExistsError(f'Table {name} already exists in this DFC.') self._table_ids[name] = table_id def __getitem__(self, name: str) -> pd.DataFrame: diff --git a/src/pudl/extract/eia923.py b/src/pudl/extract/eia923.py index badf1e8915..58e048331f 100644 --- a/src/pudl/extract/eia923.py +++ b/src/pudl/extract/eia923.py @@ -53,7 +53,7 @@ def process_renamed(df, page, **partition): df = df.rename(columns={'unnamed_0': 'census_division_and_state'}) # Drop the fields with plant_id_eia 99999 or 999999. # These are state index - if page != 'stocks': + else: df = df[~df.plant_id_eia.isin([99999, 999999])] return df diff --git a/src/pudl/extract/excel.py b/src/pudl/extract/excel.py index 9664b53f1c..12646c07b1 100644 --- a/src/pudl/extract/excel.py +++ b/src/pudl/extract/excel.py @@ -1,5 +1,4 @@ """Load excel metadata CSV files form a python data package.""" -import csv import importlib.resources import logging @@ -52,6 +51,7 @@ def __init__(self, dataset_name): self._skiprows = self._load_csv(pkg, 'skiprows.csv') self._skipfooter = self._load_csv(pkg, 'skipfooter.csv') self._sheet_name = self._load_csv(pkg, 'tab_map.csv') + self._file_name = self._load_csv(pkg, 'file_map.csv') column_map_pkg = pkg + '.column_maps' self._column_map = {} for res in importlib.resources.contents(column_map_pkg): @@ -78,6 +78,10 @@ def get_skipfooter(self, page, **partition): """Returns number of bottom rows to skip when loading given partition and page.""" return self._skipfooter.at[page, str(self._get_partition_key(partition))] + def get_file_name(self, page, **partition): + """Returns file name of given partition and page.""" + return self._file_name.at[page, str(self._get_partition_key(partition))] + def get_column_map(self, page, **partition): """Returns the dictionary mapping input columns to pudl columns for given partition and page.""" return {v: k for k, v in self._column_map[page].T.loc[str(self._get_partition_key(partition))].to_dict().items() if v != -1} @@ -287,13 +291,4 @@ def excel_filename(self, page, **partition): Return: string name of the xlsx file """ - pkg = f"pudl.package_data.meta.xlsx_maps.{self._dataset_name}" - - with importlib.resources.open_text(pkg, "file_map.csv") as f: - reader = csv.DictReader(f) - - for row in reader: - if row["page"] == page: - return row[str(self.METADATA._get_partition_key(partition))] - - raise ValueError(f"No excel sheet for {partition}, {page}") + return self.METADATA.get_file_name(page, **partition) diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index 84a84a56b5..5c5d85d2f8 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -110,9 +110,11 @@ def clean_eia_counties(df, fixes, state_col="state", county_col="county"): df = df.copy() df[county_col] = ( df[county_col].str.strip() - .str.replace(r"\s+", " ", regex=True) # Condense multiple whitespace chars. + # Condense multiple whitespace chars. + .str.replace(r"\s+", " ", regex=True) .str.replace(r"^St ", "St. ", regex=True) # Standardize abbreviation. - .str.replace(r"^Ste ", "Ste. ", regex=True) # Standardize abbreviation. + # Standardize abbreviation. + .str.replace(r"^Ste ", "Ste. ", regex=True) .str.replace("Kent & New Castle", "Kent, New Castle") # Two counties # Fix ordering, remove comma .str.replace("Borough, Kodiak Island", "Kodiak Island Borough") @@ -211,145 +213,112 @@ def is_doi(doi): return bool(re.match(doi_regex, doi)) -def is_annual(df_year, year_col='report_date'): +def clean_merge_asof( + left, + right, + left_on="report_date", + right_on="report_date", + by={}, +): """ - Determine whether a DataFrame contains consistent annual time-series data. - - Some processes will only work with consistent yearly reporting. This means - if you have two non-contiguous years of data or the datetime reporting is - inconsistent, the process will break. This function attempts to infer the - temporal frequency of the dataframe, or if that is impossible, to at least - see whether the data would be consistent with annual reporting -- e.g. if - there is only a single year of data, it should all have the same date, and - that date should correspond to January 1st of a given year. - - This function is known to be flaky and needs to be re-written to deal with - the edge cases better. + Merge two dataframes having different time report_date frequencies. + + We often need to bring together data which is reported on a monthly basis, + and entity attributes that are reported on an annual basis. The + :func:`pandas.merge_asof` is designed to do this, but requires that + dataframes are sorted by the merge keys (``left_on``, ``right_on``, and + ``by.keys()`` here). We also need to make sure that all merge keys have + identical data types in the two dataframes (e.g. ``plant_id_eia`` needs to + be a nullable integer in both dataframes, not a python int in one, and a + nullable :func:`pandas.Int64Dtype` in the other). Note that + :func:`pandas.merge_asof` performs a left merge, so the higher frequency + dataframe **must** be the left dataframe. + + We also force both ``left_on`` and ``right_on`` to be a Datetime using + :func:`pandas.to_datetime` to allow merging dataframes having integer years + with those having datetime columns. + + Because :func:`pandas.merge_asof` searches backwards for the first matching + date, this function only works if the less granular dataframe uses the + convention of reporting the first date in the time period for which it + reports. E.g. annual dataframes need to have January 1st as the date. This + is what happens by defualt if only a year or year-month are provided to + :func:`pandas.to_datetime` as strings. Args: - df_year (pandas.DataFrame): A pandas DataFrame that might - contain time-series data at annual resolution. - year_col (str): The column of the DataFrame in which the year is - reported. + left (pandas.DataFrame): The higher frequency "data" dataframe. + Typically monthly in our use cases. E.g. ``generation_eia923``. Must + contain ``report_date`` and any columns specified in the ``by`` + argument. + right (pandas.DataFrame): The lower frequency "attribute" dataframe. + Typically annual in our uses cases. E.g. ``generators_eia860``. Must + contain ``report_date`` and any columns specified in the ``by`` + argument. + left_on (str): Column in ``left`` to merge on using merge_asof. Default + is ``report_date``. Must be convertible to a Datetime using + :func:`pandas.to_datetime` + right_on (str): Column in ``right`` to merge on using merge_asof. + Default is ``report_date``. Must be convertible to a Datetime using + :func:`pandas.to_datetime` + by (dict): A dictionary enumerating any columns to merge on other than + ``report_date``. Typically ID columns like ``plant_id_eia``, + ``generator_id`` or ``boiler_id``. The keys of the dictionary are + the names of the columns, and the values are their data source, as + defined in :mod:`pudl.constants` (e.g. ``ferc1`` or ``eia``). The + data source is used to look up the column's canonical data type. Returns: - bool: True if df_year is found to be consistent with continuous annual - time resolution, False otherwise. - - """ - year_index = pd.DatetimeIndex(df_year[year_col].unique()).sort_values() - if len(year_index) >= 3: - date_freq = pd.infer_freq(year_index) - assert date_freq == 'AS-JAN', "infer_freq() not AS-JAN" - elif len(year_index) == 2: - min_year = year_index.min() - max_year = year_index.max() - assert year_index.min().month == 1, "min year not Jan" - assert year_index.min().day == 1, "min day not 1st" - assert year_index.max().month == 1, "max year not Jan" - assert year_index.max().day == 1, "max day not 1st" - delta_year = pd.Timedelta(max_year - min_year) - assert delta_year / pd.Timedelta(days=1) >= 365.0 - assert delta_year / pd.Timedelta(days=1) <= 366.0 - elif len(year_index) == 1: - assert year_index.min().month == 1, "only month not Jan" - assert year_index.min().day == 1, "only day not 1st" - else: - assert False, "Zero dates found!" - - return True + pandas.DataFrame: Merged contents of left and right input dataframes. + Will be sorted by ``left_on`` and any columns specified in ``by``. See + documentation for :func:`pandas.merge_asof` to understand how this kind + of merge works. + Raises: + ValueError: if ``left_on`` or ``right_on`` columns are missing from + their respective input dataframes. + ValueError: if any of the labels referenced in ``by`` are missing from + either the left or right dataframes. -def merge_on_date_year(df_date, df_year, on=(), how='inner', - date_col='report_date', - year_col='report_date'): - """Merge two dataframes based on a shared year. - - Some of our data is annual, and has an integer year column (e.g. FERC 1). - Some of our data is annual, and uses a Date column (e.g. EIA 860), and - some of our data has other temporal resolutions, and uses date columns - (e.g. EIA 923 fuel receipts are monthly, EPA CEMS data is hourly). This - function takes two data frames and merges them based on the year that the - data pertains to. It requires one of the dataframes to have annual - resolution, and allows the annual time to be described as either an integer - year or a Date. The non-annual dataframe must have a Date column. + """ + # Make sure we've got all the required inputs... + if left_on not in left.columns: + raise ValueError(f"Left dataframe has no column {left_on}.") + if right_on not in right.columns: + raise ValueError(f"Right dataframe has no {right_on}.") + missing_left_cols = [col for col in by if col not in left.columns] + if missing_left_cols: + raise ValueError(f"Left dataframe is missing {missing_left_cols}.") + missing_right_cols = [col for col in by if col not in right.columns] + if missing_right_cols: + raise ValueError(f"Left dataframe is missing {missing_right_cols}.") + + def cleanup(df, on, by): + df = df.astype(get_pudl_dtypes(by)) + df.loc[:, on] = pd.to_datetime(df[on]) + df = df.sort_values([on] + list(by.keys())) + return df - By default, it is assumed that both the date and annual columns to be - merged on are called 'report_date' since that's the common case when - bringing together EIA860 and EIA923 data. + return pd.merge_asof( + cleanup(df=left, on=left_on, by=by), + cleanup(df=right, on=right_on, by=by), + left_on=left_on, + right_on=right_on, + by=list(by.keys()), + tolerance=pd.Timedelta("365 days") # Should never match across years. + ) - Args: - df_date: the dataframe with a more granular date column, the label of - which is specified by date_col (report_date by default) - df_year: the dataframe with a column containing annual dates, the label - of which is specified by year_col (report_date by default) - on: The list of columns to merge on, other than the year and date - columns. - date_col: name of the date column to use to find the year to merge on. - Must be a Date. - year_col: name of the year column to merge on. Must be a Date - column with annual resolution. - Returns: - pandas.DataFrame: a dataframe with a date column, but no year - columns, and only one copy of any shared columns that were not part of - the list of columns to be merged on. The values from df1 are the ones - which are retained for any shared, non-merging columns +def get_pudl_dtype(col, data_source): + """Look up a column's canonical data type based on its PUDL data source.""" + return pudl.constants.column_dtypes[data_source][col] - Raises: - ValueError: if the date or year columns are not found, or if the year - column is found to be inconsistent with annual reporting. - - Todo: Right mergers will result in null values in the resulting date - column. The final output includes the date_col from the date_df and thus - if there are any entity records (records being merged on) in the - year_df but not in the date_df, a right merge will result in nulls in - the date_col. And when we drop the 'year_temp' column, the year from - the year_df will be gone. Need to determine how to deal with this. - Should we generate a montly record in each year? Should we generate - full time serires? Should we restrict right merges in this function? - """ - if date_col not in df_date.columns.tolist(): - raise ValueError(f"Date column {date_col} not found in df_date.") - if year_col not in df_year.columns.tolist(): - raise ValueError(f"Year column {year_col} not found in df_year.") - if not is_annual(df_year, year_col=year_col): - raise ValueError(f"df_year is not annual, based on column {year_col}.") - - first_date = pd.to_datetime(df_date[date_col].min()) - all_dates = pd.DatetimeIndex(df_date[date_col]).unique().sort_values() - if not len(all_dates) > 0: - raise ValueError("Didn't find any dates in DatetimeIndex.") - if len(all_dates) > 1: - if len(all_dates) == 2: - second_date = all_dates.max() - elif len(all_dates) > 2: - date_freq = pd.infer_freq(all_dates) - rng = pd.date_range(start=first_date, periods=2, freq=date_freq) - second_date = rng[1] - if (second_date - first_date) / pd.Timedelta(days=366) > 1.0: - raise ValueError("Consecutive annual dates >1 year apart.") - - # Create a temporary column in each dataframe with the year - df_year = df_year.copy() - df_date = df_date.copy() - df_year['year_temp'] = pd.to_datetime(df_year[year_col]).dt.year - # Drop the yearly report_date column: this way there won't be duplicates - # and the final df will have the more granular report_date. - df_year = df_year.drop([year_col], axis=1) - df_date['year_temp'] = pd.to_datetime(df_date[date_col]).dt.year - - full_on = on + ['year_temp'] - unshared_cols = [col for col in df_year.columns.tolist() - if col not in df_date.columns.tolist()] - cols_to_use = unshared_cols + full_on - - # Merge and drop the temp - merged = pd.merge(df_date, df_year[cols_to_use], how=how, on=full_on) - merged = merged.drop(['year_temp'], axis=1) - - return merged +def get_pudl_dtypes(col_source_dict): + """Look up canonical PUDL data types for columns based on data sources.""" + return { + col: get_pudl_dtype(col, col_source_dict[col]) + for col in col_source_dict + } def organize_cols(df, cols): @@ -656,8 +625,9 @@ def fix_leading_zero_gen_ids(df): .astype(str) .apply(lambda x: re.sub(r'^0+(\d+$)', r'\1', x)) ) - num_fixes = len(df.loc[df["generator_id"].astype(str) != fixed_generator_id]) - logger.info("Fixed %s EIA generator IDs with leading zeros.", num_fixes) + num_fixes = len( + df.loc[df["generator_id"].astype(str) != fixed_generator_id]) + logger.debug("Fixed %s EIA generator IDs with leading zeros.", num_fixes) df = ( df.drop("generator_id", axis="columns") .assign(generator_id=fixed_generator_id) @@ -922,15 +892,14 @@ def convert_cols_dtypes(df, data_source, name=None): bool_cols = {col: col_dtype for col, col_dtype in col_dtypes.items() if col_dtype == pd.BooleanDtype()} - # Grab only the string columns... - string_cols = {col: col_dtype for col, col_dtype - in col_dtypes.items() - if col_dtype == pd.StringDtype()} - # grab all of the non boolean columns non_bool_cols = {col: col_dtype for col, col_dtype in col_dtypes.items() if col_dtype != pd.BooleanDtype()} + # Grab only the string columns... + string_cols = {col: col_dtype for col, col_dtype + in col_dtypes.items() + if col_dtype == pd.StringDtype()} # If/when we have the columns exhaustively typed, we can do it like this, # but right now we don't have the FERC columns done, so we can't: @@ -969,11 +938,10 @@ def convert_cols_dtypes(df, data_source, name=None): if df.utility_id_eia.dtypes is np.dtype('object'): df = df.astype({'utility_id_eia': 'float'}) df = ( - df.replace(to_replace="", value={ - col: pd.NA for col in string_cols}) - .replace(to_replace="nan", value={col: pd.NA for col in string_cols}) - .astype(non_bool_cols) + df.astype(non_bool_cols) .astype(bool_cols) + .replace(to_replace="nan", value={col: pd.NA for col in string_cols}) + .replace(to_replace="", value={col: pd.NA for col in string_cols}) ) # Zip codes are highly coorelated with datatype. If they datatype gets @@ -985,9 +953,9 @@ def convert_cols_dtypes(df, data_source, name=None): zip_cols = [col for col in df.columns if 'zip_code' in col] for col in zip_cols: if '4' in col: - df[col] = zero_pad_zips(df[col], 4) + df.loc[:, col] = zero_pad_zips(df[col], 4) else: - df[col] = zero_pad_zips(df[col], 5) + df.loc[:, col] = zero_pad_zips(df[col], 5) return df @@ -1102,15 +1070,19 @@ def count_records(df, cols, new_count_col_name): cols (iterable) : list of columns to group and count by. new_count_col_name (string) : the name that will be assigned to the column that will contain the count. + Returns: - pandas.DataFrame: dataframe with only the `cols` definted and the - `new_count_col_name`. + pandas.DataFrame: dataframe containing only ``cols`` and + ``new_count_col_name``. + """ - return (df.assign(count_me=1). - groupby(cols). - agg({'count_me': 'count'}). - reset_index(). - rename(columns={'count_me': new_count_col_name})) + return ( + df.assign(count_me=1) + .groupby(cols) + .count_me.count() + .reset_index() + .rename(columns={'count_me': new_count_col_name}) + ) def cleanstrings_snake(df, cols): diff --git a/src/pudl/output/eia860.py b/src/pudl/output/eia860.py index bdf13066bb..ea1ff51fa9 100644 --- a/src/pudl/output/eia860.py +++ b/src/pudl/output/eia860.py @@ -1,10 +1,14 @@ """Functions for pulling data primarily from the EIA's Form 860.""" +import logging + import pandas as pd import sqlalchemy as sa import pudl +logger = logging.getLogger(__name__) + def utilities_eia860(pudl_engine, start_date=None, end_date=None): """Pull all fields from the EIA860 Utilities table. @@ -61,10 +65,10 @@ def utilities_eia860(pudl_engine, start_date=None, end_date=None): out_df = ( out_df.assign(report_date=lambda x: pd.to_datetime(x.report_date)) .dropna(subset=["report_date", "utility_id_eia"]) - .astype({ - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) .drop(['id'], axis='columns') ) first_cols = [ @@ -140,12 +144,12 @@ def plants_eia860(pudl_engine, start_date=None, end_date=None): pd.merge(out_df, utils_eia_df, how='left', on=['utility_id_eia']) .drop(['id'], axis='columns') .dropna(subset=["report_date", "plant_id_eia"]) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) return out_df @@ -203,17 +207,22 @@ def plants_utils_eia860(pudl_engine, start_date=None, end_date=None): 'utility_id_pudl'] ] .dropna(subset=["report_date", "plant_id_eia", "utility_id_eia"]) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) return out_df -def generators_eia860(pudl_engine, start_date=None, end_date=None): +def generators_eia860( + pudl_engine, + start_date=None, + end_date=None, + unit_ids=False, +): """Pull all fields reported in the generators_eia860 table. Merge in other useful fields including the latitude & longitude of the @@ -235,14 +244,14 @@ def generators_eia860(pudl_engine, start_date=None, end_date=None): end_date (date-like): date-like object, including a string of the form 'YYYY-MM-DD' which will be used to specify the date range of records to be pulled. Dates are inclusive. + pudl_unit_ids (bool): If True, use several heuristics to assign + individual generators to functional units. EXPERIMENTAL. Returns: pandas.DataFrame: A DataFrame containing all the fields of the EIA 860 Generators table. - """ - # pudl_settings = pudl.workspace.setup.get_defaults() - # pudl_engine = sa.create_engine(pudl_settings["pudl_db"]) + """ pt = pudl.output.pudltabl.get_table_meta(pudl_engine) # Almost all the info we need will come from here. gens_eia860_tbl = pt['generators_eia860'] @@ -271,7 +280,6 @@ def generators_eia860(pudl_engine, start_date=None, end_date=None): gens_eia860_tbl.c.report_date <= end_date ) - # breakpoint() gens_eia860 = pd.read_sql(gens_eia860_select, pudl_engine) generators_entity_eia_df = pd.read_sql( generators_entity_eia_select, pudl_engine) @@ -294,11 +302,28 @@ def generators_eia860(pudl_engine, start_date=None, end_date=None): out_df = pd.merge(out_df, pu_eia, on=['report_date', 'plant_id_eia'], how="left") - # ,'plant_name_eia', 'utility_id_eia']) # Drop a few extraneous fields... out_df = out_df.drop(['id'], axis='columns') + # Merge in the unit_id_pudl assigned to each generator in the BGA process + # Pull the BGA table and make it unit-generator only: + out_df = pd.merge( + out_df, + boiler_generator_assn_eia860( + pudl_engine, start_date=start_date, end_date=end_date + )[[ + "report_date", + "plant_id_eia", + "generator_id", + "unit_id_pudl", + "bga_source", + ]].drop_duplicates(), + on=["report_date", "plant_id_eia", "generator_id"], + how="left", + validate="m:1", + ) + # In order to be able to differentiate between single and multi-fuel # plants, we need to count how many different simple energy sources there # are associated with plant's generators. This allows us to do the simple @@ -314,13 +339,17 @@ def generators_eia860(pudl_engine, start_date=None, end_date=None): pd.merge(out_df, ft_count, how='left', on=['plant_id_eia', 'report_date']) .dropna(subset=["report_date", "plant_id_eia", "generator_id"]) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "unit_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) + # Augment those base unit_id_pudl values using heuristics, see below. + if unit_ids: + out_df = assign_unit_ids(out_df) first_cols = [ 'report_date', @@ -337,6 +366,7 @@ def generators_eia860(pudl_engine, start_date=None, end_date=None): out_df = ( pudl.helpers.organize_cols(out_df, first_cols) .sort_values(['report_date', 'plant_id_eia', 'generator_id']) + .pipe(pudl.helpers.convert_cols_dtypes, data_source="eia") ) return out_df @@ -437,12 +467,12 @@ def ownership_eia860(pudl_engine, start_date=None, end_date=None): "generator_id", "owner_utility_id_eia", ]) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) first_cols = [ @@ -464,3 +494,567 @@ def ownership_eia860(pudl_engine, start_date=None, end_date=None): ) return out_df + + +################################################################################ +# Temporary integration of more complete unit_id_pudl assignments +# Eventually this should go into the boiler-generator-association process +# and these IDs should probably live in the BGA table with the other +# unit_id_pudl values derived from the BGA table and other heuristics. +################################################################################ +def assign_unit_ids(gens_df): + """ + Group generators into operational units using various heuristics. + + Splits a few columns off from the big generator dataframe and uses several + heuristic functions to fill in missing unit_id_pudl values beyond those that + are generated in the boiler generator association process. Then merges the + new unit ID values back in to the generators dataframe. + + Args: + gens_df (pandas.DataFrame): An EIA generator table. Must contain at + least the columns: report_date, plant_id_eia, generator_id, + unit_id_pudl, bga_source, fuel_type_code_pudl, prime_mover_code, + + Returns: + pandas.DataFrame: Returned dataframe should only vary from the input in + that some NA values in the ``unit_id_pudl`` and ``bga_source`` columns + have been filled in with real values. + + Raises: + ValueError: If the input dataframe is missing required columns. + ValueError: If any generator is associated with more than one unit_id_pudl. + AssertionError: If row or column indices are changed. + AssertionError: If pre-existing unit_id_pudl or bga_source values are altered. + AssertionError: If contents of any other columns are altered at all. + + """ + required_cols = [ + "plant_id_eia", + "generator_id", + "report_date", + "unit_id_pudl", + "bga_source", + "fuel_type_code_pudl", + "prime_mover_code", + ] + if not set(required_cols).issubset(gens_df.columns): + missing_cols = set(required_cols).difference(gens_df.columns) + errstr = f"Input DataFrame missing required columns: {missing_cols}." + raise ValueError(errstr) + + unit_ids = ( + gens_df.loc[:, required_cols] + # Forward and back fill preexisting Unit IDs: + .pipe(fill_unit_ids) + # Assign Unit IDs to the CT+CA CC generators: + .pipe(assign_cc_unit_ids) + # For whole-combined cycle (CC) and single-shaft combined cycle (CS) + # units, we give each generator their own unit ID. We do the same for + # internal combustion and simple-cycle gas combustion turbines. + .pipe( + assign_single_gen_unit_ids, + prime_mover_codes=["CC", "CS", "GT", "IC"] + ) + # Nuclear units don't report in boiler_fuel_eia923 or generation_eia923 + # Their fuel consumption is reported as mmbtu in generation_fuel_eia923 + # Their net generation also only shows up in generation_fuel_eia923 + # The generation_fuel_eia923 table records a "nuclear_unit_id" which + # appears to be the same as the associated generator_id. However, we + # can't use that as a unit_id_pudl since it might have a collision with + # other already assigned unit_id_pudl values in the same plant for + # generators with other fuel types. Thus we still need to assign them + # a fuel-and-prime-mover based unit ID here. For now ALL nuclear plants + # use steam turbines. + .pipe( + assign_single_gen_unit_ids, + prime_mover_codes=["ST"], + fuel_type_code_pudl="nuclear", + label_prefix="nuclear", + ) + # In these next 4 assignments, we lump together all steam turbine (ST) + # generators that have a consistent simplified fuel_type_code_pudl + # across all years within a given plant into the same unit, since we + # won't be able to distinguish them in the generation_fuel_eia923 + # table. This will lump together solid fuels like BIT, LIG, SUB, PC etc. + # under "coal". There are a few cases in which a generator has truly + # changed its fuel type, e.g. coal-to-gas conversions but these are + # rare and insubstantial. They will not be assigned a Unit ID in this + # process. Non-fuel steam generation is also left out (geothermal & + # solar thermal) + .pipe( + assign_prime_fuel_unit_ids, + prime_mover_code="ST", + fuel_type_code_pudl="coal" + ) + .pipe( + assign_prime_fuel_unit_ids, + prime_mover_code="ST", + fuel_type_code_pudl="oil" + ) + .pipe( + assign_prime_fuel_unit_ids, + prime_mover_code="ST", + fuel_type_code_pudl="gas" + ) + .pipe( + assign_prime_fuel_unit_ids, + prime_mover_code="ST", + fuel_type_code_pudl="waste" + ) + # Retain only the merge keys and output columns + .loc[:, [ + "plant_id_eia", # Merge key + "generator_id", # Merge key + "report_date", # Merge key + "unit_id_pudl", # Output column + "bga_source" # Output column + ]] + ) + # Check that each generator is only ever associated with a single unit, + # at least within the codes that we've just assigned -- the Unit IDs that + # are based on the EIA boiler-generator-association or other matching + # methods could legitimately specify different units for generators over + # time -- which could impact the forward-/back-filled IDs as well: + old_codes = list(gens_df.bga_source.unique()) + ["bfill_units", "ffill_units"] + gens_have_unique_unit = ( + unit_ids[~unit_ids.bga_source.isin(old_codes)] + .groupby(["plant_id_eia", "generator_id"])["unit_id_pudl"] + .nunique() <= 1 # nunique() == 0 when there are only NA values. + ).all() + if not gens_have_unique_unit: + errstr = "Some generators are associated with more than one unit_id_pudl." + raise ValueError(errstr) + + # Use natural composite primary key as the index + gens_idx = ["plant_id_eia", "generator_id", "report_date"] + unit_ids = unit_ids.set_index(gens_idx).sort_index() + gens_df = gens_df.set_index(gens_idx).sort_index() + + # Check that our input DataFrame and unit IDs have identical row indices + # This is a dumb hack b/c set_index() doesn't preserve index data types + # under some circumstances, and so we have "object" and "int64" types + # being used for plant_id_eia at this point, fml. Really this should just + # be assert_index_equal() for the two df indices: + pd.testing.assert_frame_equal( + unit_ids.reset_index()[gens_idx], + gens_df.reset_index()[gens_idx] + ) + # Verify that anywhere out_df has a unit_id_pudl, it's identical in unit_ids + pd.testing.assert_series_equal( + gens_df.unit_id_pudl.dropna(), + unit_ids.unit_id_pudl.loc[gens_df.unit_id_pudl.dropna().index] + ) + # Verify that anywhere out_df has a bga_source, it's identical in unit_ids + pd.testing.assert_series_equal( + gens_df.bga_source.dropna(), + unit_ids.bga_source.loc[gens_df.bga_source.dropna().index] + ) + # We know that the indices are identical + # We know that we aren't going to overwrite anything that isn't NA + # Thus we should be able to just assign these values straight across. + unit_cols = ["unit_id_pudl", "bga_source"] + gens_df.loc[:, unit_cols] = unit_ids[unit_cols] + + return gens_df.reset_index() + + +def fill_unit_ids(gens_df): + """ + Back and forward fill Unit IDs for each plant / gen combination. + + This routine assumes that the mapping of generators to units is constant + over time, and extends those mappings into years where no boilers have + been reported -- since in the BGA we can only connect generators to each + other if they are both connected to a boiler. + + Prior to 2014, combined cycle units didn't report any "boilers" but in + latter years, they have been given "boilers" that correspond to their + generators, so that all of their fuel consumption is recorded alongside + that of other types of generators. + + The bga_source field is set to "bfill_units" for those that were backfilled, + and "ffill_units" for those that were forward filled. + + Note: We could back/forward fill the boiler IDs prior to the BGA process and + we ought to get consistent units across all the years that are the same as + what we fill in here. We could also back/forward fill boiler IDs and Unit + IDs after the fact, and we *should* get the same result. this will address + many currently "boilerless" CCNG units that use generator ID as boiler ID in + the latter years. We could try and apply this more generally, but in cases + of generator IDs that haven't been used as boiler IDs, it would break the + foreign key relationship with the boiler table, unless we added them there + too, which seems like too much deep muddling. + + Args: + gens_df (pandas.DataFrame): An generators_eia860 dataframe, which must + contain columns: report_date, plant_id_eia, generator_id, + unit_id_pudl, bga_source. + + Returns: + pandas.DataFrame: with the same columns as the input dataframe, but + having some NA values filled in for both the unit_id_pudl and bga_source + columns. + + """ + # forward and backward fill the unit IDs + gen_ids = ["plant_id_eia", "generator_id"] + gens_df = gens_df.sort_values(["report_date", "plant_id_eia", "generator_id"]) + + bfill_units = gens_df.groupby(gen_ids)["unit_id_pudl"].bfill() + bfill_idx = (bfill_units.notnull()) & (gens_df.unit_id_pudl.isnull()) + gens_df.loc[bfill_idx, "bga_source"] = "bfill_units" + gens_df.loc[bfill_idx, "unit_id_pudl"] = bfill_units.loc[bfill_idx] + + ffill_units = gens_df.groupby(gen_ids)["unit_id_pudl"].ffill() + ffill_idx = (ffill_units.notnull()) & (gens_df.unit_id_pudl.isnull()) + gens_df.loc[ffill_idx, "bga_source"] = "ffill_units" + gens_df.loc[ffill_idx, "unit_id_pudl"] = ffill_units.loc[ffill_idx] + gens_df["bga_source"] = gens_df["bga_source"].astype(pd.StringDtype()) + + return gens_df + + +def max_unit_id_by_plant(gens_df): + """ + Identify the largest unit ID associated with each plant so we don't overlap. + + The PUDL Unit IDs are sequentially assigned integers. To assign a new ID, we + need to know the largest existing Unit ID within a plant. This function + calculates that largest existing ID, or uses zero, if no Unit IDs are set + within the plant. + + Note that this calculation depends on having all of the pre-existing + generators and units still available in the dataframe! + + Args: + gens_df (pandas.DataFrame): A generators_eia860 dataframe containing at + least the columns plant_id_eia and unit_id_pudl. + + Returns: + pandas.DataFrame: Having two columns: plant_id_eia and max_unit_id_pudl + in which each row should be unique. + + """ + return ( + gens_df[["plant_id_eia", "unit_id_pudl"]] + .drop_duplicates() + .groupby("plant_id_eia").agg({"unit_id_pudl": max}) + .fillna(0) + .rename(columns={"unit_id_pudl": "max_unit_id_pudl"}) + .reset_index() + ) + + +def _append_masked_units(gens_df, row_mask, unit_ids, on): + """ + Replace rows with new PUDL Unit IDs in the original dataframe. + + Merges the newly assigned Unit IDs found in ``unit_ids`` into the + ``gens_df`` dataframe, but only for those rows which are selected by the + boolean ``row_mask``. Merges using the column or columns specified by + ``on``. This operation should only result in changes to the values of + ``unit_id_pudl`` and ``bga_source`` in the output dataframe. All of + ``gens_df``, ``unit_ids`` and ``row_mask`` must be similarly indexed for + this to work. + + Args: + gens_df (pandas.DataFrame): a gens_eia860 based dataframe. + row_mask (boolean mask): A boolean array indicating which records + in ``gens_df`` should be replaced using values from ``unit_ids``. + unit_ids (pandas.DataFrame): A dataframe containing newly assigned + ``unit_id_pudl`` values to be integrated into ``gens_df``. + on (str or list): Column or list of columns to merge on. + + Returns: + pandas.DataFrame: + + """ + return gens_df.loc[~row_mask].append( + gens_df.loc[row_mask] + .drop(["unit_id_pudl", "bga_source"], axis="columns") + .merge( + unit_ids, + on=on, + how="left", + validate="many_to_one", + ) + ) + + +def assign_single_gen_unit_ids( + gens_df, + prime_mover_codes, + fuel_type_code_pudl=None, + label_prefix="single" +): + """ + Assign a unique PUDL Unit ID to each generator of a given prime mover type. + + Calculate the maximum pre-existing PUDL Unit ID within each plant, and + assign each as of yet unidentified distinct generator within each plant + with an incrementing integer unit_id_pudl, beginning with 1 + the previous + maximum unit_id_pudl found in that plant. Mark that generator with a label + in the bga_source column consisting of label_prefix + the prime mover code. + + If fuel_type_code_pudl is not None, then only assign new Unit IDs to those + generators having the specified fuel type code, and use that fuel type code + as the label prefix, e.g. "coal_st" for a coal-fired steam turbine. + + Only generators having NA unit_id_pudl will be assigned a new ID. + + Args: + gens_df (pandas.DataFrame): A collection of EIA generator records. + Must include the ``plant_id_eia``, ``generator_id`` and + ``prime_mover_code`` and ``unit_id_pudl`` columns. + prime_mover_codes (list): List of prime mover codes for which we are + attempting to assign simple Unit IDs. + fuel_type_code_pudl (str, None): If not None, then limit the records + assigned a unit_id to those that have the specified + fuel_type_code_pudl (e.g. "coal", "gas", "oil", "nuclear") + label_prefix (str): String to use in labeling records as to how their + unit_id_pudl was set. Will be concatenated with the prime mover + code. + + Returns: + pandas.DataFrame: A new dataframe with the same rows and columns as + were passed in, but with the unit_id_pudl and bga_source columns updated + to reflect the newly assigned Unit IDs. + + """ + if fuel_type_code_pudl is not None: + # Need to make this only apply to consistent inter-year fuel types. + fuel_type_mask = gens_df.fuel_type_code_pudl == fuel_type_code_pudl + else: + fuel_type_mask = True + + # Only alter the rows lacking Unit IDs and matching our target rows + row_mask = ( + (gens_df.prime_mover_code.isin(prime_mover_codes)) + & (gens_df.unit_id_pudl.isnull()) + & (fuel_type_mask) + ) + # We only need a few columns to make these assignments. + cols = ["plant_id_eia", "generator_id", "unit_id_pudl", "prime_mover_code"] + + logger.info( + "Selected %s %s records lacking Unit IDs from %s records overall. ", + row_mask.sum(), prime_mover_codes, len(gens_df) + ) + + unit_ids = ( + gens_df.loc[row_mask, cols] + .drop_duplicates() + .merge( + max_unit_id_by_plant(gens_df), + on="plant_id_eia", + how="left", + validate="many_to_one", + ) + # Assign new unit_id_pudl values based on number of distinct generators: + .assign( + unit_id_pudl=lambda x: ( + x.groupby("plant_id_eia")["generator_id"] + .cumcount() + x.max_unit_id_pudl + 1 + ), + bga_source=lambda x: label_prefix + "_" + x.prime_mover_code.str.lower(), + ) + .drop(["max_unit_id_pudl", "prime_mover_code"], axis="columns") + ) + # Split original dataframe based on row_mask, and merge in the new IDs and + # labels only on the subset of the dataframe matching our row_mask: + return _append_masked_units( + gens_df, row_mask, unit_ids, on=["plant_id_eia", "generator_id"] + ) + + +def assign_cc_unit_ids(gens_df): + """ + Assign PUDL Unit IDs for combined cycle generation units. + + This applies only to combined cycle units reported as a combination of CT + and CA prime movers. All CT and CA generators within a plant that do not + already have a unit_id_pudl assigned will be given the same unit ID. The + ``bga_source`` column is set to one of several flags indicating what type + of arrangement was found: + + * ``orphan_ct`` (zero CA gens, 1+ CT gens) + * ``orphan_ca`` (zero CT gens, 1+ CA gens) + * ``one_ct_one_ca_inferred`` (1 CT, 1 CA) + * ``one_ct_many_ca_inferred`` (1 CT, 1+ CA) + * ``many_ct_one_ca_inferred`` (1+ CT, 1 CA) + * ``many_ct_many_ca_inferred`` (1+ CT, 1+ CA) + + Orphaned generators are still assigned a ``unit_id_pudl`` so that they can + potentially be associated with other generators in the same unit across + years. It's likely that these orphans are a result of mislabled or missing + generators. Note that as generators are added or removed over time, the + flags associated with each generator may change, even though it remains + part of the same inferred unit. + + Returns: + pandas.DataFrame + + """ + # Calculate the largest preexisting unit_id_pudl within each plant + max_unit_ids = max_unit_id_by_plant(gens_df) + + cc_missing_units = gens_df[ + (gens_df.unit_id_pudl.isna()) + & gens_df.prime_mover_code.isin(["CT", "CA"]) + ] + # On a per-plant, per-year basis, count up the number of CT and CA generators. + # Only look at those which don't already have a unit ID assigned: + cc_pm_counts = ( + cc_missing_units + .groupby(["plant_id_eia", "report_date"])["prime_mover_code"] + .value_counts().unstack(fill_value=0).astype(int).reset_index() + ) + cc_pm_counts.columns.name = None + + # Bring the max unit ID and PM counts into the DF so we can select and + # assign based on them. We're using the cc_missing_units and a temporary + # dataframe here to avoid interference from the CT & CA generators + # that do already have unit IDs assigned to them in gens_df. + tmp_df = ( + cc_missing_units + .merge( + max_unit_ids, + on="plant_id_eia", + how="left", + validate="many_to_one", + ) + .merge( + cc_pm_counts, + on=["plant_id_eia", "report_date"], + how="left", + validate="many_to_one", + ) + ) + + # Assign the new Unit IDs. + # All CA and CT units get assigned to the same unit within a plant: + tmp_df["unit_id_pudl"] = tmp_df["max_unit_id_pudl"] + 1 + + # Assign the orphan flags + tmp_df.loc[tmp_df.CA == 0, "bga_source"] = "orphan_ct" + tmp_df.loc[tmp_df.CT == 0, "bga_source"] = "orphan_ca" + # The orphan flags should only have been applied to generators that had + # at least one prime mover of the orphaned type. Just checking... + assert (tmp_df.loc[tmp_df.bga_source == "orphan_ct", "CT"] > 0).all() + assert (tmp_df.loc[tmp_df.bga_source == "orphan_ca", "CA"] > 0).all() + + # Assign flags for various arrangements of CA and CT generators + tmp_df.loc[((tmp_df.CT == 1) & (tmp_df.CA == 1)), + "bga_source"] = "one_ct_one_ca_inferred" + tmp_df.loc[((tmp_df.CT == 1) & (tmp_df.CA > 1)), + "bga_source"] = "one_ct_many_ca_inferred" + tmp_df.loc[((tmp_df.CT > 1) & (tmp_df.CA == 1)), + "bga_source"] = "many_ct_one_ca_inferred" + tmp_df.loc[((tmp_df.CT > 1) & (tmp_df.CA > 1)), + "bga_source"] = "many_ct_many_ca_inferred" + + # Align the indices of the two dataframes so we can assign directly + tmp_df = tmp_df.set_index(["plant_id_eia", "generator_id", "report_date"]) + out_df = gens_df.set_index(["plant_id_eia", "generator_id", "report_date"]) + out_df.loc[tmp_df.index, ["unit_id_pudl", "bga_source"] + ] = tmp_df[["unit_id_pudl", "bga_source"]] + + return out_df.reset_index() + + +def assign_prime_fuel_unit_ids(gens_df, prime_mover_code, fuel_type_code_pudl): + """ + Assign a PUDL Unit ID to all generators with a given prime mover and fuel. + + Within each plant, assign a Unit ID to all generators that don't have one, + and that share the same `fuel_type_code_pudl` and `prime_mover_code`. This + is especially useful for differentiating between different types of steam + turbine generators, as there are so many different kinds of steam turbines, + and the only characteristic we have to differentiate between them in this + context is the fuel they consume. E.g. nuclear, geothermal, solar thermal, + natural gas, diesel, and coal can all run steam turbines, but it doesn't + make sense to lump those turbines together into a single unit just because + they are located at the same plant. + + This routine only assigns a PUDL Unit ID to generators that have a + consistently reported value of `fuel_type_code_pudl` across all of the years + of data in `gens_df`. This consistency is important because otherwise the + prime-fuel based unit assignment could put the same generator into different + units in different years, which is currently not compatible with our concept + of "units." + + Args: + gens_df (pandas.DataFrame): A collection of EIA generator records. + Must include the ``plant_id_eia``, ``generator_id`` and + ``prime_mover_code`` and ``unit_id_pudl`` columns. + prime_mover_code (str): List of prime mover codes for which we are + attempting to assign simple Unit IDs. + fuel_type_code_pudl (str): If not None, then limit the records + assigned a unit_id to those that have the specified + fuel_type_code_pudl (e.g. "coal", "gas", "oil", "nuclear") + + Returns: + pandas.DataFrame: + + """ + # Find generators with a consistent fuel_type_code_pudl across all years. + consistent_fuel = ( + gens_df.groupby(["plant_id_eia", "generator_id"])["fuel_type_code_pudl"] + .transform(lambda x: x.nunique()) + ) == 1 + # This mask defines the generators generators we are going to alter: + row_mask = ( + (gens_df.prime_mover_code == prime_mover_code) + & (gens_df.unit_id_pudl.isna()) + & (gens_df.fuel_type_code_pudl == fuel_type_code_pudl) + & (consistent_fuel) + ) + + # We only need a few columns to make these assignments. + cols = ["plant_id_eia", "generator_id", "unit_id_pudl"] + + logger.info( + "Selected %s %s records lacking Unit IDs burning %s from %s records overall.", + row_mask.sum(), prime_mover_code, fuel_type_code_pudl, len(gens_df) + ) + + unit_ids = ( + gens_df.loc[row_mask, cols] + .drop_duplicates() + .merge( + max_unit_id_by_plant(gens_df), + on="plant_id_eia", + how="left", + validate="many_to_one", + ) + # Assign all selected generators within each plant the next PUDL Unit ID. + .assign( + unit_id_pudl=lambda x: x.max_unit_id_pudl + 1, + bga_source=lambda x: fuel_type_code_pudl + "_" + prime_mover_code.lower(), + ) + .drop(["max_unit_id_pudl"], axis="columns") + ) + + # Split original dataframe based on row_mask, and merge in the new IDs and + # labels only on the subset of the dataframe matching our row_mask: + out_df = _append_masked_units( + gens_df, row_mask, unit_ids, on=["plant_id_eia", "generator_id"] + ) + + # Find generators with inconsistent fuel_type_code_pudl so we can label them + inconsistent_fuel = ( + out_df.groupby(["plant_id_eia", "generator_id"])["fuel_type_code_pudl"] + .transform(lambda x: x.nunique()) + ) > 1 + + inconsistent_fuel_mask = ( + (out_df.prime_mover_code == prime_mover_code) + & (out_df.unit_id_pudl.isna()) + & (out_df.fuel_type_code_pudl == fuel_type_code_pudl) + & (inconsistent_fuel) + ) + out_df.loc[inconsistent_fuel_mask, "bga_source"] = ( + "inconsistent_" + fuel_type_code_pudl + "_" + prime_mover_code.lower() + ) + return out_df diff --git a/src/pudl/output/eia923.py b/src/pudl/output/eia923.py index 830e64f5d6..739bc7deae 100644 --- a/src/pudl/output/eia923.py +++ b/src/pudl/output/eia923.py @@ -130,19 +130,23 @@ def generation_fuel_eia923(pudl_engine, freq=None, 'utility_name_eia', ] out_df = ( - pudl.helpers.merge_on_date_year(gf_df, pu_eia, on=['plant_id_eia']) + pudl.helpers.clean_merge_asof( + left=gf_df, + right=pu_eia, + by={"plant_id_eia": "eia"} + ) # Drop any records where we've failed to get the 860 data merged in... .dropna(subset=[ 'plant_id_eia', 'utility_id_eia', ]) .pipe(pudl.helpers.organize_cols, first_cols) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) return out_df @@ -170,7 +174,7 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, - ``fuel_qty_units`` (sum) - ``fuel_cost_per_mmbtu`` (weighted average) - ``total_fuel_cost`` (sum) - - ``total_heat_content_mmbtu`` (sum) + - ``fuel_consumed_mmbtu`` (sum) - ``heat_content_mmbtu_per_unit`` (weighted average) - ``sulfur_content_pct`` (weighted average) - ``ash_content_pct`` (weighted average) @@ -236,9 +240,10 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, frc_df = pd.read_sql(frc_select, pudl_engine) - frc_df = pd.merge(frc_df, cmi_df, - how='left', - on='mine_id_pudl') + frc_df = ( + frc_df.merge(cmi_df, how='left', on='mine_id_pudl') + .rename(columns={"state": "mine_state"}) + ) cols_to_drop = ['id', 'mine_id_pudl'] frc_df = frc_df.drop(cols_to_drop, axis=1) @@ -248,29 +253,32 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, logger.info('filling in fuel cost NaNs EIA APIs monthly state averages') fuel_costs_avg_eiaapi = get_fuel_cost_avg_eiaapi( FUEL_COST_CATEGORIES_EIAAPI) - # add the state from the plants table - frc_df = ( - pudl.helpers.merge_on_date_year( - frc_df, - pudl.output.eia860.plants_eia860( - pudl_engine, start_date=start_date, end_date=end_date)[ - ['report_date', 'plant_id_eia', 'state']], - on=['plant_id_eia', ], how='left') - .merge(fuel_costs_avg_eiaapi, - on=['report_date', 'state', 'fuel_type_code_pudl'], - how='left') - .assign( - # add a flag column to note if we are using the api data - fuel_cost_from_eiaapi=lambda x: - np.where(x.fuel_cost_per_mmbtu.isnull() - & x.fuel_cost_per_unit.notnull(), - True, False), - fuel_cost_per_mmbtu=lambda x: - np.where(x.fuel_cost_per_mmbtu.isnull(), - (x.fuel_cost_per_unit - / x.heat_content_mmbtu_per_unit), - x.fuel_cost_per_mmbtu) - ) + # Merge to bring in states associated with each plant: + plant_states = pd.read_sql( + "SELECT plant_id_eia, state FROM plants_entity_eia;", pudl_engine + ) + frc_df = frc_df.merge(plant_states, on="plant_id_eia", how="left") + + # Merge in monthly per-state fuel costs from EIA based on fuel type. + frc_df = frc_df.merge( + fuel_costs_avg_eiaapi, + on=['report_date', 'state', 'fuel_type_code_pudl'], + how='left', + ) + frc_df = frc_df.assign( + # add a flag column to note if we are using the api data + fuel_cost_from_eiaapi=lambda x: + np.where( + x.fuel_cost_per_mmbtu.isnull() & x.fuel_cost_per_unit.notnull(), + True, + False + ), + fuel_cost_per_mmbtu=lambda x: + np.where( + x.fuel_cost_per_mmbtu.isnull(), + (x.fuel_cost_per_unit / x.heat_content_mmbtu_per_unit), + x.fuel_cost_per_mmbtu + ) ) # add the flag column to note that we didn't fill in with API data else: @@ -290,10 +298,10 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, ) # Calculate a few totals that are commonly needed: - frc_df['total_heat_content_mmbtu'] = \ + frc_df['fuel_consumed_mmbtu'] = \ frc_df['heat_content_mmbtu_per_unit'] * frc_df['fuel_qty_units'] frc_df['total_fuel_cost'] = \ - frc_df['total_heat_content_mmbtu'] * frc_df['fuel_cost_per_mmbtu'] + frc_df['fuel_consumed_mmbtu'] * frc_df['fuel_cost_per_mmbtu'] if freq is not None: by = ['plant_id_eia', 'fuel_type_code_pudl', pd.Grouper(freq=freq)] @@ -314,7 +322,7 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, frc_gb = frc_df.groupby(by=by) frc_df = frc_gb.agg({ 'fuel_qty_units': pudl.helpers.sum_na, - 'total_heat_content_mmbtu': pudl.helpers.sum_na, + 'fuel_consumed_mmbtu': pudl.helpers.sum_na, 'total_fuel_cost': pudl.helpers.sum_na, 'total_sulfur_content': pudl.helpers.sum_na, 'total_ash_content': pudl.helpers.sum_na, @@ -324,9 +332,9 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, 'fuel_cost_from_eiaapi': 'any', }) frc_df['fuel_cost_per_mmbtu'] = \ - frc_df['total_fuel_cost'] / frc_df['total_heat_content_mmbtu'] + frc_df['total_fuel_cost'] / frc_df['fuel_consumed_mmbtu'] frc_df['heat_content_mmbtu_per_unit'] = \ - frc_df['total_heat_content_mmbtu'] / frc_df['fuel_qty_units'] + frc_df['fuel_consumed_mmbtu'] / frc_df['fuel_qty_units'] frc_df['sulfur_content_pct'] = \ frc_df['total_sulfur_content'] / frc_df['fuel_qty_units'] frc_df['ash_content_pct'] = \ @@ -338,19 +346,27 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, frc_df['moisture_content_pct'] = \ frc_df['total_moisture_content'] / frc_df['fuel_qty_units'] frc_df = frc_df.reset_index() - frc_df = frc_df.drop(['total_ash_content', - 'total_sulfur_content', - 'total_moisture_content', - 'total_chlorine_content', - 'total_mercury_content'], axis=1) + frc_df = frc_df.drop([ + 'total_ash_content', + 'total_sulfur_content', + 'total_moisture_content', + 'total_chlorine_content', + 'total_mercury_content' + ], axis=1) # Bring in some generic plant & utility information: - pu_eia = pudl.output.eia860.plants_utils_eia860(pudl_engine, - start_date=start_date, - end_date=end_date) + pu_eia = pudl.output.eia860.plants_utils_eia860( + pudl_engine, + start_date=start_date, + end_date=end_date + ) out_df = ( - pudl.helpers.merge_on_date_year(frc_df, pu_eia, on=['plant_id_eia']) + pudl.helpers.clean_merge_asof( + left=frc_df, + right=pu_eia, + by={"plant_id_eia": "eia"} + ) .dropna(subset=['utility_id_eia']) .pipe( pudl.helpers.organize_cols, @@ -364,12 +380,12 @@ def fuel_receipts_costs_eia923(pudl_engine, freq=None, 'utility_name_eia', ] ) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) if freq is None: @@ -395,7 +411,7 @@ def boiler_fuel_eia923(pudl_engine, freq=None, * ``fuel_consumed_units`` (sum) * ``fuel_mmbtu_per_unit`` (weighted average) - * ``total_heat_content_mmbtu`` (sum) + * ``fuel_consumed_mmbtu`` (sum) * ``sulfur_content_pct`` (weighted average) * ``ash_content_pct`` (weighted average) @@ -435,11 +451,11 @@ def boiler_fuel_eia923(pudl_engine, freq=None, # The total heat content is also useful in its own right, and we'll keep it # around. Also needed to calculate average heat content per unit of fuel. - bf_df['total_heat_content_mmbtu'] = bf_df['fuel_consumed_units'] * \ + bf_df['fuel_consumed_mmbtu'] = bf_df['fuel_consumed_units'] * \ bf_df['fuel_mmbtu_per_unit'] # Create a date index for grouping based on freq - by = ['plant_id_eia', 'boiler_id', 'fuel_type_code_pudl'] + by = ['plant_id_eia', 'boiler_id', 'fuel_type_code', 'fuel_type_code_pudl'] if freq is not None: # In order to calculate the weighted average sulfur # content and ash content we need to calculate these totals. @@ -454,13 +470,13 @@ def boiler_fuel_eia923(pudl_engine, freq=None, # Sum up these totals within each group, and recalculate the per-unit # values (weighted in this case by fuel_consumed_units) bf_df = bf_gb.agg({ - 'total_heat_content_mmbtu': pudl.helpers.sum_na, + 'fuel_consumed_mmbtu': pudl.helpers.sum_na, 'fuel_consumed_units': pudl.helpers.sum_na, 'total_sulfur_content': pudl.helpers.sum_na, 'total_ash_content': pudl.helpers.sum_na, }) - bf_df['fuel_mmbtu_per_unit'] = bf_df['total_heat_content_mmbtu'] / \ + bf_df['fuel_mmbtu_per_unit'] = bf_df['fuel_consumed_mmbtu'] / \ bf_df['fuel_consumed_units'] bf_df['sulfur_content_pct'] = bf_df['total_sulfur_content'] / \ bf_df['fuel_consumed_units'] @@ -471,28 +487,58 @@ def boiler_fuel_eia923(pudl_engine, freq=None, axis=1) # Grab some basic plant & utility information to add. - pu_eia = pudl.output.eia860.plants_utils_eia860(pudl_engine, - start_date=start_date, - end_date=end_date) + pu_eia = pudl.output.eia860.plants_utils_eia860( + pudl_engine, + start_date=start_date, + end_date=end_date + ) out_df = ( - pudl.helpers.merge_on_date_year(bf_df, pu_eia, on=['plant_id_eia']) + pudl.helpers.clean_merge_asof( + left=bf_df, + right=pu_eia, + by={"plant_id_eia": "eia"}, + ) .dropna(subset=['plant_id_eia', 'utility_id_eia', 'boiler_id']) - .pipe(pudl.helpers.organize_cols, - cols=['report_date', - 'plant_id_eia', - 'plant_id_pudl', - 'plant_name_eia', - 'utility_id_eia', - 'utility_id_pudl', - 'utility_name_eia', - 'boiler_id']) - .astype({ - 'plant_id_eia': "Int64", - 'plant_id_pudl': "Int64", - 'utility_id_eia': "Int64", - 'utility_id_pudl': "Int64", - }) ) + # Merge in the unit_id_pudl assigned to each generator in the BGA process + # Pull the BGA table and make it unit-boiler only: + bga_boilers = ( + pudl.output.eia860.boiler_generator_assn_eia860( + pudl_engine, + start_date=start_date, + end_date=end_date + ) + .loc[:, ["report_date", "plant_id_eia", "boiler_id", "unit_id_pudl"]] + .drop_duplicates() + ) + out_df = pudl.helpers.clean_merge_asof( + left=out_df, + right=bga_boilers, + by={ + "plant_id_eia": "eia", + "boiler_id": "eia", + } + ) + out_df = pudl.helpers.organize_cols( + out_df, + cols=[ + 'report_date', + 'plant_id_eia', + 'plant_id_pudl', + 'plant_name_eia', + 'utility_id_eia', + 'utility_id_pudl', + 'utility_name_eia', + 'boiler_id', + 'unit_id_pudl', + ] + ).astype(pudl.helpers.get_pudl_dtypes({ + 'plant_id_eia': "eia", + 'plant_id_pudl': "eia", + 'unit_id_pudl': "eia", + 'utility_id_eia': "eia", + 'utility_id_pudl': "eia", + })) if freq is None: out_df = out_df.drop(['id'], axis=1) @@ -500,8 +546,12 @@ def boiler_fuel_eia923(pudl_engine, freq=None, return out_df -def generation_eia923(pudl_engine, freq=None, - start_date=None, end_date=None): +def generation_eia923( + pudl_engine, + freq=None, + start_date=None, + end_date=None +): """ Pull records from the boiler_fuel_eia923 table in a given data range. @@ -547,15 +597,42 @@ def generation_eia923(pudl_engine, freq=None, {'net_generation_mwh': pudl.helpers.sum_na}).reset_index() # Grab EIA 860 plant and utility specific information: - pu_eia = pudl.output.eia860.plants_utils_eia860(pudl_engine, - start_date=start_date, - end_date=end_date) + pu_eia = pudl.output.eia860.plants_utils_eia860( + pudl_engine, + start_date=start_date, + end_date=end_date + ) # Merge annual plant/utility data in with the more granular dataframe out_df = ( - pudl.helpers.merge_on_date_year(g_df, pu_eia, on=['plant_id_eia']) + pudl.helpers.clean_merge_asof( + left=g_df, + right=pu_eia, + by={"plant_id_eia": "eia"} + ) .dropna(subset=['plant_id_eia', 'utility_id_eia', 'generator_id']) - .pipe(pudl.helpers.organize_cols, cols=[ + ) + # Merge in the unit_id_pudl assigned to each generator in the BGA process + # Pull the BGA table and make it unit-generator only: + bga_gens = ( + pudl.output.eia860.boiler_generator_assn_eia860( + pudl_engine, + start_date=start_date, + end_date=end_date + ) + .loc[:, ["report_date", "plant_id_eia", "generator_id", "unit_id_pudl"]] + .drop_duplicates() + ) + out_df = pudl.helpers.clean_merge_asof( + left=out_df, + right=bga_gens, + by={ + "plant_id_eia": "eia", + "generator_id": "eia", + } + ) + out_df = ( + out_df.pipe(pudl.helpers.organize_cols, cols=[ 'report_date', 'plant_id_eia', 'plant_id_pudl', @@ -565,12 +642,12 @@ def generation_eia923(pudl_engine, freq=None, 'utility_name_eia', 'generator_id', ]) - .astype({ - "plant_id_eia": "Int64", - "plant_id_pudl": "Int64", - "utility_id_eia": "Int64", - "utility_id_pudl": "Int64", - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "plant_id_pudl": "eia", + "utility_id_eia": "eia", + "utility_id_pudl": "eia", + })) ) if freq is None: diff --git a/src/pudl/output/ferc714.py b/src/pudl/output/ferc714.py index 28d6e8b7f1..71338ca2af 100644 --- a/src/pudl/output/ferc714.py +++ b/src/pudl/output/ferc714.py @@ -174,12 +174,19 @@ def categorize_eia_code(eia_codes, ba_ids, util_ids, priority="balancing_authori ba_ids = ( pd.Series(ba_ids, name="balancing_authority_id_eia") .drop_duplicates() - .astype(pd.Int64Dtype()) + .astype( + pudl.helpers.get_pudl_dtype( + col="balancing_authority_id_eia", + data_source="eia" + ) + ) ) util_ids = ( pd.Series(util_ids, name="utility_id_eia") .drop_duplicates() - .astype(pd.Int64Dtype()) + .astype( + pudl.helpers.get_pudl_dtype(col="utility_id_eia", data_source="eia") + ) ) df = ( diff --git a/src/pudl/output/glue.py b/src/pudl/output/glue.py deleted file mode 100644 index 852389e5b4..0000000000 --- a/src/pudl/output/glue.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Functions that pull glue tables from the PUDL DB for output. - -The glue tables hold information that relates our different datasets to each -other, for example mapping the FERC plants to EIA generators, or the EIA -boilers to EIA generators, or EPA smokestacks to EIA generators. - -""" -import pandas as pd -import sqlalchemy as sa - -import pudl - - -def boiler_generator_assn(pudl_engine, start_date=None, end_date=None): - """Pulls the more complete PUDL/EIA boiler generator associations. - - Args: - pudl_engine (sqlalchemy.engine.Engine): SQLAlchemy connection engine - for the PUDL DB. - start_date (date): Date to begin retrieving data. - end_date (date): Date to end retrieving data. - - Returns: - pandas.DataFrame: A DataFrame containing the more complete PUDL/EIA - boiler generator associations. - - """ - pt = pudl.output.pudltabl.get_table_meta(pudl_engine) - bga_eia_tbl = pt['boiler_generator_assn_eia860'] - bga_eia_select = sa.sql.select([bga_eia_tbl]) - - if start_date is not None: - start_date = pd.to_datetime(start_date) - bga_eia_select = bga_eia_select.where( - bga_eia_tbl.c.report_date >= start_date - ) - if end_date is not None: - end_date = pd.to_datetime(end_date) - bga_eia_select = bga_eia_select.where( - bga_eia_tbl.c.report_date <= end_date - ) - out_df = ( - pd.read_sql(bga_eia_select, pudl_engine) - .assign(report_date=lambda x: pd.to_datetime(x.report_date)) - ) - return out_df diff --git a/src/pudl/output/pudltabl.py b/src/pudl/output/pudltabl.py index 1ebd824714..ddefee8333 100644 --- a/src/pudl/output/pudltabl.py +++ b/src/pudl/output/pudltabl.py @@ -71,7 +71,8 @@ def __init__( Args: freq (str): String describing time frequency at which to aggregate - the reported data. E.g. 'MS' (monthly start). + the reported data. Currently this can only be 'MS' + (month start) or 'AS' (annual start). start_date (date): Beginning date for data to pull from the PUDL DB. end_date (date): End date for data to pull from the PUDL DB. @@ -89,23 +90,41 @@ def __init__( ``mcoe()``, ``capacity_factor()`` and ``heat_rate_by_unit()``. """ + if not isinstance(pudl_engine, sa.engine.base.Engine): + raise TypeError( + "PudlTabl needs pudl_engine to be a SQLAlchemy Engine, but we " + f"got a {type(pudl_engine)}." + ) self.pudl_engine = pudl_engine + + if freq not in (None, "AS", "MS"): + raise ValueError( + f"freq must be one of None, 'MS', or 'AS', but we got {freq}." + ) self.freq = freq # We need datastore access because some data is not yet integrated into the # PUDL DB. See the etl_eia861 method. - self.ds = ds - if self.ds is None: + if not ( + (ds is None) or + isinstance(ds, pudl.workspace.datastore.Datastore) + ): + raise TypeError( + "PudlTable needs ds to be a PUDL Datastore object, but we got " + f"a {type(ds)}." + ) + if ds is None: pudl_in = Path(pudl.workspace.setup.get_defaults()["pudl_in"]) self.ds = pudl.workspace.datastore.Datastore( local_cache_path=pudl_in / "data" ) + else: + self.ds = ds # grab all working eia dates to use to set start and end dates if they # are not set eia_dates = pudl.helpers.get_working_eia_dates() if start_date is None: self.start_date = min(eia_dates) - else: # Make sure it's a date... and not a string. self.start_date = pd.to_datetime(start_date) @@ -116,9 +135,6 @@ def __init__( # Make sure it's a date... and not a string. self.end_date = pd.to_datetime(end_date) - if not pudl_engine: - raise AssertionError('PudlTabl object needs a pudl_engine') - self.roll_fuel_cost = roll_fuel_cost self.fill_fuel_cost = fill_fuel_cost self.fill_net_gen = fill_net_gen @@ -495,7 +511,7 @@ def plants_eia860(self, update=False): end_date=self.end_date,) return self._dfs['plants_eia860'] - def gens_eia860(self, update=False): + def gens_eia860(self, update=False, unit_ids=False): """ Pull a dataframe describing generators, as reported in EIA 860. @@ -511,7 +527,9 @@ def gens_eia860(self, update=False): self._dfs['gens_eia860'] = pudl.output.eia860.generators_eia860( self.pudl_engine, start_date=self.start_date, - end_date=self.end_date) + end_date=self.end_date, + unit_ids=unit_ids, + ) return self._dfs['gens_eia860'] def own_eia860(self, update=False): @@ -624,9 +642,9 @@ def gen_eia923(self, update=False): 'to the generator level instead of using the less complete ' 'generation_eia923 table.' ) - self._dfs['gen_eia923'] = self.gen_allocated_eia923(update) + self._dfs['gen_eia923'] = self.gen_allocated_eia923(update=update) else: - self._dfs['gen_eia923'] = self.gen_original_eia923(update) + self._dfs['gen_eia923'] = self.gen_original_eia923(update=update) return self._dfs['gen_eia923'] def gen_original_eia923(self, update=False): @@ -789,25 +807,6 @@ def plant_in_service_ferc1(self, update=False): ########################################################################### # EIA MCOE OUTPUTS ########################################################################### - def bga(self, update=False): - """ - Pull the more complete EIA/PUDL boiler-generator associations. - - Args: - update (bool): If true, re-calculate the output dataframe, even if - a cached version exists. - - Returns: - pandas.DataFrame: a denormalized table for interactive use. - - """ - if update or self._dfs['bga'] is None: - self._dfs['bga'] = pudl.output.glue.boiler_generator_assn( - self.pudl_engine, - start_date=self.start_date, - end_date=self.end_date) - return self._dfs['bga'] - def hr_by_gen(self, update=False): """ Calculate and return generator level heat rates (mmBTU/MWh). @@ -878,9 +877,15 @@ def capacity_factor(self, update=False, ) return self._dfs['capacity_factor'] - def mcoe(self, update=False, - min_heat_rate=5.5, min_fuel_cost_per_mwh=0.0, - min_cap_fact=0.0, max_cap_fact=1.5): + def mcoe( + self, + update=False, + min_heat_rate=5.5, + min_fuel_cost_per_mwh=0.0, + min_cap_fact=0.0, + max_cap_fact=1.5, + all_gens=True, + ): """ Calculate and return generator level MCOE based on EIA data. @@ -907,6 +912,9 @@ def mcoe(self, update=False, with a lower capacity factor will be filtered out before returning. This allows the user to exclude generators that aren't being used enough to have valid. + all_gens (bool): Controls whether the output contains records for + all generators in the :ref:`generators_eia860` table, or only + those generators with associated MCOE data. True by default. Returns: :class:`pandas.DataFrame`: a compilation of generator attributes, @@ -920,6 +928,7 @@ def mcoe(self, update=False, min_fuel_cost_per_mwh=min_fuel_cost_per_mwh, min_cap_fact=min_cap_fact, max_cap_fact=max_cap_fact, + all_gens=all_gens, ) return self._dfs['mcoe'] diff --git a/src/pudl/package_data/meta/datapkg/datapackage.json b/src/pudl/package_data/meta/datapkg/datapackage.json index 7f1b196a86..f64335735a 100644 --- a/src/pudl/package_data/meta/datapkg/datapackage.json +++ b/src/pudl/package_data/meta/datapkg/datapackage.json @@ -17,12 +17,13 @@ "plants_steam_ferc1": "id", "utilities_eia860": "id" }, - "resources": [{ + "resources": [ + { "profile": "tabular-data-resource", "name": "fuel_ferc1", "path": "data/fuel_ferc1.csv", "title": "fuel_ferc1", - "description": "Annual fuel consumed by large thermal generating plants. As reported on page 402 of FERC Form 1.", + "description": "Annual fuel cost and quanitiy for steam plants with a capacity of 25+ MW, internal combustion and gas-turbine plants of 10+ MW, and all nuclear plants. As reported on page 402 of FERC Form 1 and extracted from the f1_fuel table in FERC's FoxPro Database.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -36,7 +37,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -133,26 +135,38 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_name_ferc1", "utility_id_ferc1"], - "reference": { - "resource": "plants_ferc1", - "fields": ["plant_name_ferc1", "utility_id_ferc1"] + "foreignKeys": [ + { + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ], + "reference": { + "resource": "plants_ferc1", + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", @@ -173,7 +187,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "line_id", "type": "string", "description": "A human readable string uniquely identifying the FERC depreciation account. Used in lieu of the actual line number, as those numbers are not guaranteed to be consistent from year to year.", @@ -193,11 +208,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -205,6 +222,7 @@ "name": "utilities_eia", "path": "data/utilities_eia.csv", "title": "utilities_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -218,7 +236,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_eia", "type": "integer", "format": "default", @@ -240,22 +259,26 @@ "primaryKey": [ "utility_id_eia" ], - "foreignKeys": [{ - "fields": "utility_id_pudl", - "reference": { - "resource": "utilities_pudl", - "fields": "utility_id_pudl" + "foreignKeys": [ + { + "fields": "utility_id_pudl", + "reference": { + "resource": "utilities_pudl", + "fields": "utility_id_pudl" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -263,6 +286,7 @@ "name": "energy_source_eia923", "path": "data/energy_source_eia923.csv", "title": "energy_source_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -276,7 +300,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "abbr", "type": "string", "format": "default" @@ -294,21 +319,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "datasets", "path": "data/datasets.csv", "title": "datasets", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -322,7 +352,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "datasource", "type": "string", "description": "Code identifying a dataset available within PUDL.", @@ -350,11 +381,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -362,6 +395,7 @@ "name": "fuel_type_eia923", "path": "data/fuel_type_eia923.csv", "title": "fuel_type_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -375,7 +409,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "abbr", "type": "string", "format": "default" @@ -393,15 +428,19 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", @@ -422,7 +461,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_pudl", "type": "integer", "description": "A manually assigned PUDL utility ID. May not be stable over time.", @@ -442,11 +482,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -454,6 +496,7 @@ "name": "plants_ferc1", "path": "data/plants_ferc1.csv", "title": "FERC 1 Plants", + "description": "Name, utility, and PUDL id for steam plants with a capacity of 25,000+ kW, internal combustion and gas-turbine plants of 10,000+ kW, and all nuclear plants.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -467,7 +510,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_ferc1", "type": "integer", "description": "FERC assigned respondent_id, identifying the reporting entity. Stable from year to year.", @@ -490,7 +534,8 @@ "utility_id_ferc1", "plant_name_ferc1" ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "utility_id_ferc1", "reference": { "resource": "utilities_ferc1", @@ -509,11 +554,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -521,6 +568,7 @@ "name": "generation_eia923", "path": "data/generation_eia923.csv", "title": "generation_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -534,7 +582,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -559,32 +608,45 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_id_eia", "generator_id"], - "reference": { - "resource": "generators_entity_eia", - "fields": ["plant_id_eia", "generator_id"] + "foreignKeys": [ + { + "fields": [ + "plant_id_eia", + "generator_id" + ], + "reference": { + "resource": "generators_entity_eia", + "fields": [ + "plant_id_eia", + "generator_id" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "utilities_entity_eia", "path": "data/utilities_entity_eia.csv", "title": "utilities_entity_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -598,7 +660,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_eia", "type": "integer", "description": "The EIA Utility Identification number.", @@ -618,11 +681,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -630,6 +695,7 @@ "name": "generators_entity_eia", "path": "data/generators_entity_eia.csv", "title": "generators_entity_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -643,7 +709,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -774,22 +841,26 @@ "plant_id_eia", "generator_id" ], - "foreignKeys": [{ - "fields": "plant_id_eia", - "reference": { - "resource": "plants_entity_eia", - "fields": "plant_id_eia" + "foreignKeys": [ + { + "fields": "plant_id_eia", + "reference": { + "resource": "plants_entity_eia", + "fields": "plant_id_eia" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -797,7 +868,7 @@ "name": "plants_hydro_ferc1", "path": "data/plants_hydro_ferc1.csv", "title": "plants_hydro_ferc1", - "description": "Hydroelectric generating plant statistics for large plants. Large plants have an installed nameplate capacity of more than 10 MW. As reported on FERC Form 1, pages 406-407, and extracted from the f1_hydro table in FERC's FoxPro database.", + "description": "Generating plant statistics for hydroelectric plants with an installed nameplate capacity of 10 MW. As reported on FERC Form 1, pages 406-407 and extracted from the f1_hydro table in FERC's FoxPro database.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -811,7 +882,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -1042,26 +1114,38 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["utility_id_ferc1", "plant_name_ferc1"], - "reference": { - "resource": "plants_ferc1", - "fields": ["utility_id_ferc1", "plant_name_ferc1"] + "foreignKeys": [ + { + "fields": [ + "utility_id_ferc1", + "plant_name_ferc1" + ], + "reference": { + "resource": "plants_ferc1", + "fields": [ + "utility_id_ferc1", + "plant_name_ferc1" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", @@ -1082,7 +1166,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_ferc1", "type": "integer", "description": "FERC assigned respondent_id, identifying the reporting entity. Stable from year to year.", @@ -1680,26 +1765,32 @@ "report_year", "amount_type" ], - "foreignKeys": [{ - "fields": "utility_id_ferc1", - "reference": { - "resource": "utilities_ferc1", - "fields": "utility_id_ferc1" + "foreignKeys": [ + { + "fields": "utility_id_ferc1", + "reference": { + "resource": "utilities_ferc1", + "fields": "utility_id_ferc1" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", @@ -1720,7 +1811,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -1834,21 +1926,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", "name": "generators_eia860", "path": "data/generators_eia860.csv", "title": "generators_eia860", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -1862,7 +1959,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -2251,16 +2349,26 @@ "type": "string", "description": "Source of EIA 860 data. Either Annual EIA 860 or the year-to-date updates from EIA 860M.", "constraints": { - "enum": ["eia860","eia860m"] - }, + "enum": [ + "eia860", + "eia860m" + ] + }, "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_id_eia", "generator_id"], + "foreignKeys": [ + { + "fields": [ + "plant_id_eia", + "generator_id" + ], "reference": { "resource": "generators_entity_eia", - "fields": ["plant_id_eia", "generator_id"] + "fields": [ + "plant_id_eia", + "generator_id" + ] } }, { @@ -2275,26 +2383,30 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia860", - "path": "https://www.eia.gov/electricity/data/eia860" - }, - { - "title": "eia860m", - "path": "https://www.eia.gov/electricity/data/eia860m" - } - ] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia860", + "path": "https://www.eia.gov/electricity/data/eia860" + }, + { + "title": "eia860m", + "path": "https://www.eia.gov/electricity/data/eia860m" + } + ] }, { "profile": "tabular-data-resource", "name": "ownership_eia860", "path": "data/ownership_eia860.csv", "title": "ownership_eia860", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -2308,7 +2420,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "report_date", "type": "date", "description": "Date reported.", @@ -2450,7 +2563,8 @@ "format": "default" } ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "utility_id_eia", "reference": { "resource": "utilities_entity_eia", @@ -2458,10 +2572,16 @@ } }, { - "fields": ["plant_id_eia", "generator_id"], + "fields": [ + "plant_id_eia", + "generator_id" + ], "reference": { "resource": "generators_entity_eia", - "fields": ["plant_id_eia", "generator_id"] + "fields": [ + "plant_id_eia", + "generator_id" + ] } } ], @@ -2469,15 +2589,19 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia860", - "path": "https://www.eia.gov/electricity/data/eia860" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia860", + "path": "https://www.eia.gov/electricity/data/eia860" + } + ] }, { "profile": "tabular-data-resource", @@ -2498,7 +2622,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_pudl", "type": "integer", "description": "A manually assigned PUDL plant ID. May not be constant over time.", @@ -2518,11 +2643,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -2530,6 +2657,7 @@ "name": "fuel_type_aer_eia923", "path": "data/fuel_type_aer_eia923.csv", "title": "fuel_type_aer_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -2543,7 +2671,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "abbr", "type": "string", "format": "default" @@ -2561,15 +2690,19 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", @@ -2590,7 +2723,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_ferc1", "type": "integer", "description": "FERC-assigned respondent_id, identifying the reporting entity. Stable from year to year.", @@ -2644,7 +2778,8 @@ "report_year", "line_id" ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "utility_id_ferc1", "reference": { "resource": "utilities_ferc1", @@ -2663,21 +2798,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", "name": "prime_movers_eia923", "path": "data/prime_movers_eia923.csv", "title": "prime_movers_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -2691,7 +2831,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "abbr", "type": "string", "format": "default" @@ -2709,21 +2850,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "fuel_receipts_costs_eia923", "path": "data/fuel_receipts_costs_eia923.csv", "title": "fuel_receipts_costs_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -2737,7 +2883,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "id", "type": "integer", "description": "PUDL issued surrogate key.", @@ -2906,7 +3053,8 @@ "primaryKey": [ "id" ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "plant_id_eia", "reference": { "resource": "plants_entity_eia", @@ -2946,15 +3094,19 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", @@ -2975,7 +3127,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_ferc1", "type": "integer", "description": "FERC assigned respondent_id, identifying the reporting entity. Stable from year to year.", @@ -2997,22 +3150,26 @@ "primaryKey": [ "utility_id_ferc1" ], - "foreignKeys": [{ - "fields": "utility_id_pudl", - "reference": { - "resource": "utilities_pudl", - "fields": "utility_id_pudl" + "foreignKeys": [ + { + "fields": "utility_id_pudl", + "reference": { + "resource": "utilities_pudl", + "fields": "utility_id_pudl" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -3020,6 +3177,7 @@ "name": "boiler_generator_assn_eia860", "path": "data/boiler_generator_assn_eia860.csv", "title": "boiler_generator_assn_eia860", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3033,7 +3191,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -3066,7 +3225,7 @@ { "name": "unit_id_pudl", "type": "integer", - "description": "PUDL-assigned unit identification number.", + "description": "Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.", "format": "default" }, { @@ -3076,32 +3235,45 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_id_eia", "generator_id"], - "reference": { - "resource": "generators_entity_eia", - "fields": ["plant_id_eia", "generator_id"] + "foreignKeys": [ + { + "fields": [ + "plant_id_eia", + "generator_id" + ], + "reference": { + "resource": "generators_entity_eia", + "fields": [ + "plant_id_eia", + "generator_id" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia860", - "path": "https://www.eia.gov/electricity/data/eia860" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia860", + "path": "https://www.eia.gov/electricity/data/eia860" + } + ] }, { "profile": "tabular-data-resource", "name": "transport_modes_eia923", "path": "data/transport_modes_eia923.csv", "title": "transport_modes_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3115,7 +3287,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "abbr", "type": "string", "format": "default" @@ -3133,21 +3306,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "coalmine_eia923", "path": "data/coalmine_eia923.csv", "title": "coalmine_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3161,7 +3339,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "mine_id_pudl", "type": "integer", "description": "PUDL issued surrogate key.", @@ -3285,21 +3464,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "plants_eia", "path": "data/plants_eia.csv", "title": "plants_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3313,7 +3497,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "format": "default", @@ -3333,22 +3518,26 @@ "primaryKey": [ "plant_id_eia" ], - "foreignKeys": [{ - "fields": "plant_id_pudl", - "reference": { - "resource": "plants_pudl", - "fields": "plant_id_pudl" + "foreignKeys": [ + { + "fields": "plant_id_pudl", + "reference": { + "resource": "plants_pudl", + "fields": "plant_id_pudl" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -3370,7 +3559,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "ferc_account_id", "type": "string", "description": "Account number, from FERC's Uniform System of Accounts for Electric Plant. Also includes higher level labeled categories.", @@ -3390,11 +3580,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -3402,6 +3594,7 @@ "name": "utility_plant_assn", "path": "data/utility_plant_assn.csv", "title": "utility_plant_assn", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3415,7 +3608,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_pudl", "type": "integer", "format": "default" @@ -3430,7 +3624,8 @@ "utility_id_pudl", "plant_id_pudl" ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "utility_id_pudl", "reference": { "resource": "utilities_pudl", @@ -3449,11 +3644,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -3461,6 +3658,7 @@ "name": "plants_eia860", "path": "data/plants_eia860.csv", "title": "plants_eia860", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3474,7 +3672,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -3624,32 +3823,39 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": "plant_id_eia", - "reference": { - "resource": "plants_entity_eia", - "fields": "plant_id_eia" + "foreignKeys": [ + { + "fields": "plant_id_eia", + "reference": { + "resource": "plants_entity_eia", + "fields": "plant_id_eia" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia860", - "path": "https://www.eia.gov/electricity/data/eia860" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia860", + "path": "https://www.eia.gov/electricity/data/eia860" + } + ] }, { "profile": "tabular-data-resource", "name": "generation_fuel_eia923", "path": "data/generation_fuel_eia923.csv", "title": "generation_fuel_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3663,7 +3869,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -3678,7 +3885,7 @@ { "name": "nuclear_unit_id", "type": "integer", - "description": "For nuclear plants only, the unit number .One digit numeric. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level.", + "description": "For nuclear plants only. This unit ID appears to correspond directly to the generator ID, as reported in the EIA-860. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level. Note that nuclear plants only report their fuel consumption and net generation in the generation_fuel_eia923 table and not elsewhere.", "format": "default" }, { @@ -3742,7 +3949,8 @@ "format": "default" } ], - "foreignKeys": [{ + "foreignKeys": [ + { "fields": "plant_id_eia", "reference": { "resource": "plants_entity_eia", @@ -3775,22 +3983,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "plants_small_ferc1", "path": "data/plants_small_ferc1.csv", "title": "plants_small_ferc1", - "description": "Generating plant statistics for small plants, as reported on FERC Form 1 pages 410-411, and extracted from the FERC FoxPro database table f1_gnrt_plant. Small generating plants are defined by having nameplate capacity of less than 25MW for steam plants, and less than 10MW for internal combustion, conventional hydro, and pumped storage plants.", + "description": "Generating plant statistics for steam plants with less than 25 MW installed nameplate capacity and internal combustion plants, gas turbine-plants, conventional hydro plants, and pumped storage plants with less than 10 MW installed nameplate capacity. As reported on FERC Form 1 pages 410-411, and extracted from the FERC FoxPro database table f1_gnrt_plant.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3804,7 +4016,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -3913,32 +4126,45 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_name_original", "utility_id_ferc1"], - "reference": { - "resource": "plants_ferc1", - "fields": ["plant_name_ferc1", "utility_id_ferc1"] + "foreignKeys": [ + { + "fields": [ + "plant_name_original", + "utility_id_ferc1" + ], + "reference": { + "resource": "plants_ferc1", + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", "name": "plants_pumped_storage_ferc1", "path": "data/plants_pumped_storage_ferc1.csv", "title": "plants_pumped_storage_ferc1", + "description": "Generating plant statistics for hydroelectric pumped storage plants with an installed nameplate capacity of 10+ MW. As reported on page 408 of FERC Form 1 and extracted from the f1_pumped_storage table in FERC's FoxPro Database.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -3952,7 +4178,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -4207,32 +4434,45 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_name_ferc1", "utility_id_ferc1"], - "reference": { - "resource": "plants_ferc1", - "fields": ["plant_name_ferc1", "utility_id_ferc1"] + "foreignKeys": [ + { + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ], + "reference": { + "resource": "plants_ferc1", + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", "name": "boiler_fuel_eia923", "path": "data/boiler_fuel_eia923.csv", "title": "boiler_fuel_eia923", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -4246,7 +4486,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -4301,32 +4542,39 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": "fuel_type_code", - "reference": { - "resource": "fuel_type_eia923", - "fields": "abbr" + "foreignKeys": [ + { + "fields": "fuel_type_code", + "reference": { + "resource": "fuel_type_eia923", + "fields": "abbr" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia923", - "path": "https://www.eia.gov/electricity/data/eia923" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia923", + "path": "https://www.eia.gov/electricity/data/eia923" + } + ] }, { "profile": "tabular-data-resource", "name": "hourly_emissions_epacems", "path": "data/hourly_emissions_epacems.csv", "title": "hourly_emissions_epacems", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -4340,7 +4588,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "state", "type": "string", "constraints": { @@ -4559,22 +4808,26 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "epacems", - "path": "ftp://newftp.epa.gov/dmdnload/emissions/hourly/monthly" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "epacems", + "path": "ftp://newftp.epa.gov/dmdnload/emissions/hourly/monthly" + } + ] }, { "profile": "tabular-data-resource", "name": "plants_steam_ferc1", "path": "data/plants_steam_ferc1.csv", "title": "plants_steam_ferc1", - "description": "Large thermal generating plants, as reported on page 402 of FERC Form 1.", + "description": "Generating plant statistics for steam plants with a capacity of 25+ MW, internal combustion and gas-turbine plants of 10+ MW, and all nuclear plants. As reported on page 402 of FERC Form 1 and extracted from the f1_gnrt_plant table in FERC's FoxPro Database.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -4588,7 +4841,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "record_id", "type": "string", "description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", @@ -4851,32 +5105,45 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": ["plant_name_ferc1", "utility_id_ferc1"], - "reference": { - "resource": "plants_ferc1", - "fields": ["plant_name_ferc1", "utility_id_ferc1"] + "foreignKeys": [ + { + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ], + "reference": { + "resource": "plants_ferc1", + "fields": [ + "plant_name_ferc1", + "utility_id_ferc1" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "ferc1", - "path": "ftp://eforms1.ferc.gov/f1allyears" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "ferc1", + "path": "ftp://eforms1.ferc.gov/f1allyears" + } + ] }, { "profile": "tabular-data-resource", "name": "utilities_eia860", "path": "data/utilities_eia860.csv", "title": "utilities_eia860", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -4890,7 +5157,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "utility_id_eia", "type": "integer", "description": "EIA-assigned identification number for the company that is responsible for the day-to-day operations of the generator.", @@ -5026,32 +5294,39 @@ "format": "default" } ], - "foreignKeys": [{ - "fields": "utility_id_eia", - "reference": { - "resource": "utilities_entity_eia", - "fields": "utility_id_eia" + "foreignKeys": [ + { + "fields": "utility_id_eia", + "reference": { + "resource": "utilities_entity_eia", + "fields": "utility_id_eia" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], - "sources": [{ - "title": "eia860", - "path": "https://www.eia.gov/electricity/data/eia860" - }] + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], + "sources": [ + { + "title": "eia860", + "path": "https://www.eia.gov/electricity/data/eia860" + } + ] }, { "profile": "tabular-data-resource", "name": "boilers_entity_eia", "path": "data/boilers_entity_eia.csv", "title": "boilers_entity_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -5065,7 +5340,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -5088,22 +5364,26 @@ "plant_id_eia", "boiler_id" ], - "foreignKeys": [{ - "fields": "plant_id_eia", - "reference": { - "resource": "plants_entity_eia", - "fields": "plant_id_eia" + "foreignKeys": [ + { + "fields": "plant_id_eia", + "reference": { + "resource": "plants_entity_eia", + "fields": "plant_id_eia" + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -5111,6 +5391,7 @@ "name": "plants_entity_eia", "path": "data/plants_entity_eia.csv", "title": "plants_entity_eia", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -5124,7 +5405,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", @@ -5270,11 +5552,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "Creative Commons Attribution 4.0", - "path": "https://creativecommons.org/licenses/by/4.0/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "Creative Commons Attribution 4.0", + "path": "https://creativecommons.org/licenses/by/4.0/" + } + ], "sources": [] }, { @@ -5282,6 +5566,7 @@ "name": "plant_unit_epa", "path": "data/plant_unit_epa.csv", "title": "EPA Plant and Unit", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -5295,7 +5580,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_epa", "type": "integer", "format": "default" @@ -5315,11 +5601,13 @@ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "EPA", - "path": "https://www.epa.gov/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "EPA", + "path": "https://www.epa.gov/" + } + ], "sources": [] }, { @@ -5327,6 +5615,7 @@ "name": "assn_plant_id_eia_epa", "path": "data/assn_plant_id_eia_epa.csv", "title": "EIA-EPA Plant ID Crosswalk", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -5340,7 +5629,8 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_epa", "type": "integer", "format": "default" @@ -5356,22 +5646,30 @@ "plant_id_epa", "plant_id_eia" ], - "foreignKeys": [{ - "fields": ["plant_id_eia"], - "reference": { - "resource": "plants_entity_eia", - "fields": ["plant_id_eia"] + "foreignKeys": [ + { + "fields": [ + "plant_id_eia" + ], + "reference": { + "resource": "plants_entity_eia", + "fields": [ + "plant_id_eia" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "EPA", - "path": "https://www.epa.gov/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "EPA", + "path": "https://www.epa.gov/" + } + ], "sources": [] }, { @@ -5379,6 +5677,7 @@ "name": "assn_gen_eia_unit_epa", "path": "data/assn_gen_eia_unit_epa.csv", "title": "EIA Plant and Generator to EPA Unit", + "description": "Pending description.", "encoding": "utf-8", "mediatype": "text/csv", "format": "csv", @@ -5392,23 +5691,24 @@ "caseSensitiveHeader": false }, "schema": { - "fields": [{ + "fields": [ + { "name": "plant_id_eia", "type": "integer", "description": "The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.", "format": "default" }, { - "name": "unit_id_epa", - "type": "string", - "description": "Smokestack unit monitored by EPA CEMS.", - "format": "default" + "name": "unit_id_epa", + "type": "string", + "description": "Smokestack unit monitored by EPA CEMS.", + "format": "default" }, { - "name": "generator_id", - "type": "string", - "description": "Generator identification code. Often numeric, but sometimes includes letters. It's a string!", - "format": "default" + "name": "generator_id", + "type": "string", + "description": "Generator identification code. Often numeric, but sometimes includes letters. It's a string!", + "format": "default" } ], "primaryKey": [ @@ -5416,22 +5716,32 @@ "unit_id_epa", "generator_id" ], - "foreignKeys": [{ - "fields": ["plant_id_eia", "generator_id"], - "reference": { - "resource": "generators_entity_eia", - "fields": ["plant_id_eia", "generator_id"] + "foreignKeys": [ + { + "fields": [ + "plant_id_eia", + "generator_id" + ], + "reference": { + "resource": "generators_entity_eia", + "fields": [ + "plant_id_eia", + "generator_id" + ] + } } - }], + ], "missingValues": [ "" ] }, - "licenses": [{ - "name": "CC-BY-4.0", - "title": "EPA", - "path": "https://www.epa.gov/" - }], + "licenses": [ + { + "name": "CC-BY-4.0", + "title": "EPA", + "path": "https://www.epa.gov/" + } + ], "sources": [] } ] diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/boiler_fuel.csv index 86f87a7b2f..6197aa8d4d 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/boiler_fuel.csv @@ -1,66 +1,66 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant -plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id -plant_state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state -census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region -nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region -naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code -eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,sector_number,eia_sector_number,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number -sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name -boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id -prime_mover_code,prime_mover_type,prime_mover_type,prime_mover_type,reported_prime_mover,prime_mover_type,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover -fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code -fuel_unit,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label -fuel_consumed_units_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january -fuel_consumed_units_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february -fuel_consumed_units_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march -fuel_consumed_units_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april -fuel_consumed_units_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may -fuel_consumed_units_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june -fuel_consumed_units_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july -fuel_consumed_units_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august -fuel_consumed_units_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september -fuel_consumed_units_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october -fuel_consumed_units_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november -fuel_consumed_units_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december -fuel_mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january -fuel_mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february -fuel_mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march -fuel_mmbtu_per_unit_april,mmbtu_per_unit_apirl,mmbtu_per_unit_apirl,mmbtu_per_unit_apirl,mmbtu_per_unit_april,mmbtu_per_unit_apirl,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april -fuel_mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may -fuel_mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june -fuel_mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july -fuel_mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august -fuel_mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september -fuel_mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october -fuel_mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november -fuel_mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december -sulfur_content_pct_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january -sulfur_content_pct_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february -sulfur_content_pct_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march -sulfur_content_pct_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april -sulfur_content_pct_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may -sulfur_content_pct_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june -sulfur_content_pct_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july -sulfur_content_pct_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august -sulfur_content_pct_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september -sulfur_content_pct_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october -sulfur_content_pct_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november -sulfur_content_pct_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december -ash_content_pct_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january -ash_content_pct_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february -ash_content_pct_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march -ash_content_pct_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april -ash_content_pct_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may -ash_content_pct_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june -ash_content_pct_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july -ash_content_pct_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august -ash_content_pct_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september -ash_content_pct_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october -ash_content_pct_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november -ash_content_pct_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december -total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity -report_year,year,year,year,year,year,year,year,year,year,year,year +year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id +combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant +plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name +operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state +census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region +nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region +naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code +eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,sector_number,eia_sector_number,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number +sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name +boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id,boiler_id +prime_mover_code,prime_mover_type,prime_mover_type,prime_mover_type,prime_mover_type,reported_prime_mover,prime_mover_type,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover +fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code +fuel_unit,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label +fuel_consumed_units_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january,quantity_of_fuel_consumed_january +fuel_consumed_units_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february,quantity_of_fuel_consumed_february +fuel_consumed_units_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march,quantity_of_fuel_consumed_march +fuel_consumed_units_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april,quantity_of_fuel_consumed_april +fuel_consumed_units_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may,quantity_of_fuel_consumed_may +fuel_consumed_units_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june,quantity_of_fuel_consumed_june +fuel_consumed_units_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july,quantity_of_fuel_consumed_july +fuel_consumed_units_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august,quantity_of_fuel_consumed_august +fuel_consumed_units_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september,quantity_of_fuel_consumed_september +fuel_consumed_units_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october,quantity_of_fuel_consumed_october +fuel_consumed_units_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november,quantity_of_fuel_consumed_november +fuel_consumed_units_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december,quantity_of_fuel_consumed_december +fuel_mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january,mmbtu_per_unit_january +fuel_mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february,mmbtu_per_unit_february +fuel_mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march,mmbtu_per_unit_march +fuel_mmbtu_per_unit_april,mmbtu_per_unit_apirl,mmbtu_per_unit_apirl,mmbtu_per_unit_apirl,mmbtu_per_unit_apirl,mmbtu_per_unit_april,mmbtu_per_unit_apirl,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april,mmbtu_per_unit_april +fuel_mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may +fuel_mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june,mmbtu_per_unit_june +fuel_mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july,mmbtu_per_unit_july +fuel_mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august,mmbtu_per_unit_august +fuel_mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september,mmbtu_per_unit_september +fuel_mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october,mmbtu_per_unit_october +fuel_mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november,mmbtu_per_unit_november +fuel_mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december,mmbtu_per_unit_december +sulfur_content_pct_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january,sulfur_content_january +sulfur_content_pct_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february,sulfur_content_february +sulfur_content_pct_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march,sulfur_content_march +sulfur_content_pct_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april,sulfur_content_april +sulfur_content_pct_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may,sulfur_content_may +sulfur_content_pct_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june,sulfur_content_june +sulfur_content_pct_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july,sulfur_content_july +sulfur_content_pct_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august,sulfur_content_august +sulfur_content_pct_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september,sulfur_content_september +sulfur_content_pct_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october,sulfur_content_october +sulfur_content_pct_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november,sulfur_content_november +sulfur_content_pct_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december,sulfur_content_december +ash_content_pct_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january,ash_content_january +ash_content_pct_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february,ash_content_february +ash_content_pct_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march,ash_content_march +ash_content_pct_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april,ash_content_april +ash_content_pct_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may,ash_content_may +ash_content_pct_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june,ash_content_june +ash_content_pct_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july,ash_content_july +ash_content_pct_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august,ash_content_august +ash_content_pct_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september,ash_content_september +ash_content_pct_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october,ash_content_october +ash_content_pct_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november,ash_content_november +ash_content_pct_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december,ash_content_december +total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity +report_year,year,year,year,year,year,year,year,year,year,year,year,year diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/fuel_receipts_costs.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/fuel_receipts_costs.csv index aa63a18189..9aedffc7c2 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/fuel_receipts_costs.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/fuel_receipts_costs.csv @@ -1,32 +1,32 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -report_year,year,year,year,year,year,year,year,year,year,year,year -report_month,month,month,month,month,month,month,month,month,month,month,month -plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -plant_state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state -contract_type_code,contract_type,contract_type,contract_type,purchase_type,contract_type,purchase_type,purchase_type,purchase_type,purchase_type,purchase_type,purchase_type -contract_expiration_date,contract_exp_date,contract_exp_date,contract_exp_date,contract_expiration_date,contract_exp_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date -energy_source_code,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source -fuel_group_code,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group -mine_type_code,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type -state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state -county_id_fips,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county -mine_id_msha,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id -mine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name -supplier_name,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier -fuel_qty_units,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity -heat_content_mmbtu_per_unit,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content -sulfur_content_pct,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content -ash_content_pct,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content -mercury_content_ppm,,,,average_mercury_content,,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content -fuel_cost_per_mmbtu,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost -regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id -reporting_frequency,respondent_frequency,respondent_frequency,respondent_frequency,reporting_frequency,respondent_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency -primary_transportation_mode_code,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode -secondary_transportation_mode_code,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode -natural_gas_transport_code,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_supply_contract_type,natural_gas_supply_contract_type,natural_gas_supply_contract_type,natural_gas_supply_contract_type -natural_gas_delivery_contract_type_code,,,,,,,,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type -moisture_content_pct,,,,,,,,moisture_content,moisture_content,moisture_content,moisture_content -chlorine_content_ppm,,,,,,,,chlorine_content,chlorine_content,chlorine_content,chlorine_content +year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +report_year,year,year,year,year,year,year,year,year,year,year,year,year +report_month,month,month,month,month,month,month,month,month,month,month,month,month +plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id +plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name +plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state +contract_type_code,contract_type,contract_type,contract_type,contract_type,purchase_type,contract_type,purchase_type,purchase_type,purchase_type,purchase_type,purchase_type,purchase_type +contract_expiration_date,contract_exp_date,contract_exp_date,contract_exp_date,contract_exp_date,contract_expiration_date,contract_exp_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date,contract_expiration_date +energy_source_code,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source,energy_source +fuel_group_code,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group,fuel_group +mine_type_code,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type,coalmine_type +state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state,coalmine_state +county_id_fips,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county,coalmine_county +mine_id_msha,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id,coalmine_msha_id +mine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name,coalmine_name +supplier_name,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier,supplier +fuel_qty_units,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity,quantity +heat_content_mmbtu_per_unit,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content,average_heat_content +sulfur_content_pct,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content,average_sulfur_content +ash_content_pct,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content,average_ash_content +mercury_content_ppm,,,,,average_mercury_content,,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content +fuel_cost_per_mmbtu,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost +regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated +operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +reporting_frequency,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency,reporting_frequency,respondent_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency +primary_transportation_mode_code,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode +secondary_transportation_mode_code,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode +natural_gas_transport_code,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_transportation_service,natural_gas_supply_contract_type,natural_gas_supply_contract_type,natural_gas_supply_contract_type,natural_gas_supply_contract_type +natural_gas_delivery_contract_type_code,,,,,,,,,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type,natural_gas_delivery_contract_type +moisture_content_pct,,,,,,,,,moisture_content,moisture_content,moisture_content,moisture_content +chlorine_content_ppm,,,,,,,,,chlorine_content,chlorine_content,chlorine_content,chlorine_content diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generation_fuel.csv index 56ed061620..69540f8767 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generation_fuel.csv @@ -1,98 +1,98 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant -nuclear_unit_id,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id -plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id -plant_state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state -census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region -nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region -reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved -naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code -eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number -sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name -prime_mover_code,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover -fuel_type,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code -fuel_type_code_aer,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code -reserved_1,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved -reserved_2,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved -fuel_unit,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label -fuel_consumed_units_january,quantity_jan,quantity_jan,quantity_jan,quantity_january,quantity_jan,quantity_january,quantity_january,quantity_january,quantity_january,quantity_january,quantity_january -fuel_consumed_units_february,quantity_feb,quantity_feb,quantity_feb,quantity_february,quantity_feb,quantity_february,quantity_february,quantity_february,quantity_february,quantity_february,quantity_february -fuel_consumed_units_march,quantity_mar,quantity_mar,quantity_mar,quantity_march,quantity_mar,quantity_march,quantity_march,quantity_march,quantity_march,quantity_march,quantity_march -fuel_consumed_units_april,quantity_apr,quantity_apr,quantity_apr,quantity_april,quantity_apr,quantity_april,quantity_april,quantity_april,quantity_april,quantity_april,quantity_april -fuel_consumed_units_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may -fuel_consumed_units_june,quantity_jun,quantity_jun,quantity_jun,quantity_june,quantity_jun,quantity_june,quantity_june,quantity_june,quantity_june,quantity_june,quantity_june -fuel_consumed_units_july,quantity_jul,quantity_jul,quantity_jul,quantity_july,quantity_jul,quantity_july,quantity_july,quantity_july,quantity_july,quantity_july,quantity_july -fuel_consumed_units_august,quantity_aug,quantity_aug,quantity_aug,quantity_august,quantity_aug,quantity_august,quantity_august,quantity_august,quantity_august,quantity_august,quantity_august -fuel_consumed_units_september,quantity_sep,quantity_sep,quantity_sep,quantity_september,quantity_sep,quantity_september,quantity_september,quantity_september,quantity_september,quantity_september,quantity_september -fuel_consumed_units_october,quantity_oct,quantity_oct,quantity_oct,quantity_october,quantity_oct,quantity_october,quantity_october,quantity_october,quantity_october,quantity_october,quantity_october -fuel_consumed_units_november,quantity_nov,quantity_nov,quantity_nov,quantity_november,quantity_nov,quantity_november,quantity_november,quantity_november,quantity_november,quantity_november,quantity_november -fuel_consumed_units_december,quantity_dec,quantity_dec,quantity_dec,quantity_december,quantity_dec,quantity_december,quantity_december,quantity_december,quantity_december,quantity_december,quantity_december -fuel_consumed_for_electricity_units_january,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_january,elec_quantity_jan,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january -fuel_consumed_for_electricity_units_february,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_february,elec_quantity_feb,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february -fuel_consumed_for_electricity_units_march,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_march,elec_quantity_mar,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march -fuel_consumed_for_electricity_units_april,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_april,elec_quantity_apr,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april -fuel_consumed_for_electricity_units_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may -fuel_consumed_for_electricity_units_june,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_june,elec_quantity_jun,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june -fuel_consumed_for_electricity_units_july,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_july,elec_quantity_jul,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july -fuel_consumed_for_electricity_units_august,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_august,elec_quantity_aug,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august -fuel_consumed_for_electricity_units_september,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_september,elec_quantity_sep,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september -fuel_consumed_for_electricity_units_october,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_october,elec_quantity_oct,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october -fuel_consumed_for_electricity_units_november,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_november,elec_quantity_nov,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november -fuel_consumed_for_electricity_units_december,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_december,elec_quantity_dec,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december -fuel_mmbtu_per_unit_january,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtuper_unit_jan,mmbtuper_unit_january,mmbtuper_unit_jan,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january -fuel_mmbtu_per_unit_february,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtuper_unit_feb,mmbtuper_unit_february,mmbtuper_unit_feb,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february -fuel_mmbtu_per_unit_march,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtuper_unit_mar,mmbtuper_unit_march,mmbtuper_unit_mar,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march -fuel_mmbtu_per_unit_april,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtuper_unit_apr,mmbtuper_unit_april,mmbtuper_unit_apr,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april -fuel_mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may -fuel_mmbtu_per_unit_june,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtuper_unit_jun,mmbtuper_unit_june,mmbtuper_unit_jun,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june -fuel_mmbtu_per_unit_july,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtuper_unit_jul,mmbtuper_unit_july,mmbtuper_unit_jul,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july -fuel_mmbtu_per_unit_august,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtuper_unit_aug,mmbtuper_unit_august,mmbtuper_unit_aug,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august -fuel_mmbtu_per_unit_september,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtuper_unit_sep,mmbtuper_unit_september,mmbtuper_unit_sep,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september -fuel_mmbtu_per_unit_october,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtuper_unit_oct,mmbtuper_unit_october,mmbtuper_unit_oct,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october -fuel_mmbtu_per_unit_november,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtuper_unit_nov,mmbtuper_unit_november,mmbtuper_unit_nov,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november -fuel_mmbtu_per_unit_december,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtuper_unit_dec,mmbtuper_unit_december,mmbtuper_unit_dec,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december -fuel_consumed_mmbtu_january,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtujan,tot_mmbtu_january,tot_mmbtujan,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january -fuel_consumed_mmbtu_february,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtufeb,tot_mmbtu_february,tot_mmbtufeb,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february -fuel_consumed_mmbtu_march,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtumar,tot_mmbtu_march,tot_mmbtumar,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march -fuel_consumed_mmbtu_april,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtuapr,tot_mmbtu_april,tot_mmbtuapr,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april -fuel_consumed_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtumay,tot_mmbtu_may,tot_mmbtumay,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may -fuel_consumed_mmbtu_june,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtujun,tot_mmbtu_june,tot_mmbtujun,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june -fuel_consumed_mmbtu_july,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtujul,tot_mmbtu_july,tot_mmbtujul,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july -fuel_consumed_mmbtu_august,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtuaug,tot_mmbtu_august,tot_mmbtuaug,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august -fuel_consumed_mmbtu_september,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtusep,tot_mmbtu_september,tot_mmbtusep,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september -fuel_consumed_mmbtu_october,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtuoct,tot_mmbtu_october,tot_mmbtuoct,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october -fuel_consumed_mmbtu_november,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtunov,tot_mmbtu_november,tot_mmbtunov,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november -fuel_consumed_mmbtu_december,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtudec,tot_mmbtu_december,tot_mmbtudec,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december -fuel_consumed_for_electricity_mmbtu_january,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtujan,elec_mmbtu_january,elec_mmbtujan,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january -fuel_consumed_for_electricity_mmbtu_february,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtufeb,elec_mmbtu_february,elec_mmbtufeb,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february -fuel_consumed_for_electricity_mmbtu_march,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtumar,elec_mmbtu_march,elec_mmbtumar,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march -fuel_consumed_for_electricity_mmbtu_april,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtuapr,elec_mmbtu_april,elec_mmbtuapr,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april -fuel_consumed_for_electricity_mmbtu_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtumay,elec_mmbtu_may,elec_mmbtumay,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may -fuel_consumed_for_electricity_mmbtu_june,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtujun,elec_mmbtu_june,elec_mmbtujun,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june -fuel_consumed_for_electricity_mmbtu_july,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtujul,elec_mmbtu_july,elec_mmbtujul,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july -fuel_consumed_for_electricity_mmbtu_august,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtuaug,elec_mmbtu_august,elec_mmbtuaug,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august -fuel_consumed_for_electricity_mmbtu_september,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtusep,elec_mmbtu_september,elec_mmbtusep,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september -fuel_consumed_for_electricity_mmbtu_october,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtuoct,elec_mmbtu_october,elec_mmbtuoct,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october -fuel_consumed_for_electricity_mmbtu_november,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtunov,elec_mmbtu_november,elec_mmbtunov,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november -fuel_consumed_for_electricity_mmbtu_december,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtudec,elec_mmbtu_december,elec_mmbtudec,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december -net_generation_mwh_january,netgen_jan,netgen_jan,netgen_jan,netgen_january,netgen_jan,netgen_january,netgen_january,netgen_january,netgen_january,netgen_january,netgen_january -net_generation_mwh_february,netgen_feb,netgen_feb,netgen_feb,netgen_february,netgen_feb,netgen_february,netgen_february,netgen_february,netgen_february,netgen_february,netgen_february -net_generation_mwh_march,netgen_mar,netgen_mar,netgen_mar,netgen_march,netgen_mar,netgen_march,netgen_march,netgen_march,netgen_march,netgen_march,netgen_march -net_generation_mwh_april,netgen_apr,netgen_apr,netgen_apr,netgen_april,netgen_apr,netgen_april,netgen_april,netgen_april,netgen_april,netgen_april,netgen_april -net_generation_mwh_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may -net_generation_mwh_june,netgen_jun,netgen_jun,netgen_jun,netgen_june,netgen_jun,netgen_june,netgen_june,netgen_june,netgen_june,netgen_june,netgen_june -net_generation_mwh_july,netgen_jul,netgen_jul,netgen_jul,netgen_july,netgen_jul,netgen_july,netgen_july,netgen_july,netgen_july,netgen_july,netgen_july -net_generation_mwh_august,netgen_aug,netgen_aug,netgen_aug,netgen_august,netgen_aug,netgen_august,netgen_august,netgen_august,netgen_august,netgen_august,netgen_august -net_generation_mwh_september,netgen_sep,netgen_sep,netgen_sep,netgen_september,netgen_sep,netgen_september,netgen_september,netgen_september,netgen_september,netgen_september,netgen_september -net_generation_mwh_october,netgen_oct,netgen_oct,netgen_oct,netgen_october,netgen_oct,netgen_october,netgen_october,netgen_october,netgen_october,netgen_october,netgen_october -net_generation_mwh_november,netgen_nov,netgen_nov,netgen_nov,netgen_november,netgen_nov,netgen_november,netgen_november,netgen_november,netgen_november,netgen_november,netgen_november -net_generation_mwh_december,netgen_dec,netgen_dec,netgen_dec,netgen_december,netgen_dec,netgen_december,netgen_december,netgen_december,netgen_december,netgen_december,netgen_december -total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity -electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity -total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu -elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu -net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours -report_year,year,year,year,year,year,year,year,year,year,year,year +year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id +combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant +nuclear_unit_id,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id +plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name +operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +plant_state,state,state,state,state,state,state,state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state +census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region +nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region +reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved +naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code +eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number +sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name +prime_mover_code,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover +fuel_type,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code,reported_fuel_type_code +fuel_type_code_aer,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code,aer_fuel_type_code +reserved_1,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved +reserved_2,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved,reserved +fuel_unit,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label,physical_unit_label +fuel_consumed_units_january,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_jan,quantity_january,quantity_jan,quantity_january,quantity_january,quantity_january,quantity_january,quantity_january,quantity_january +fuel_consumed_units_february,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_feb,quantity_february,quantity_feb,quantity_february,quantity_february,quantity_february,quantity_february,quantity_february,quantity_february +fuel_consumed_units_march,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_mar,quantity_march,quantity_mar,quantity_march,quantity_march,quantity_march,quantity_march,quantity_march,quantity_march +fuel_consumed_units_april,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_apr,quantity_april,quantity_apr,quantity_april,quantity_april,quantity_april,quantity_april,quantity_april,quantity_april +fuel_consumed_units_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may,quantity_may +fuel_consumed_units_june,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_jun,quantity_june,quantity_jun,quantity_june,quantity_june,quantity_june,quantity_june,quantity_june,quantity_june +fuel_consumed_units_july,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_jul,quantity_july,quantity_jul,quantity_july,quantity_july,quantity_july,quantity_july,quantity_july,quantity_july +fuel_consumed_units_august,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_aug,quantity_august,quantity_aug,quantity_august,quantity_august,quantity_august,quantity_august,quantity_august,quantity_august +fuel_consumed_units_september,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_sep,quantity_september,quantity_sep,quantity_september,quantity_september,quantity_september,quantity_september,quantity_september,quantity_september +fuel_consumed_units_october,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_oct,quantity_october,quantity_oct,quantity_october,quantity_october,quantity_october,quantity_october,quantity_october,quantity_october +fuel_consumed_units_november,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_nov,quantity_november,quantity_nov,quantity_november,quantity_november,quantity_november,quantity_november,quantity_november,quantity_november +fuel_consumed_units_december,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_dec,quantity_december,quantity_dec,quantity_december,quantity_december,quantity_december,quantity_december,quantity_december,quantity_december +fuel_consumed_for_electricity_units_january,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_jan,elec_quantity_january,elec_quantity_jan,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january,elec_quantity_january +fuel_consumed_for_electricity_units_february,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_feb,elec_quantity_february,elec_quantity_feb,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february,elec_quantity_february +fuel_consumed_for_electricity_units_march,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_mar,elec_quantity_march,elec_quantity_mar,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march,elec_quantity_march +fuel_consumed_for_electricity_units_april,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_apr,elec_quantity_april,elec_quantity_apr,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april,elec_quantity_april +fuel_consumed_for_electricity_units_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may,elec_quantity_may +fuel_consumed_for_electricity_units_june,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_jun,elec_quantity_june,elec_quantity_jun,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june,elec_quantity_june +fuel_consumed_for_electricity_units_july,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_jul,elec_quantity_july,elec_quantity_jul,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july,elec_quantity_july +fuel_consumed_for_electricity_units_august,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_aug,elec_quantity_august,elec_quantity_aug,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august,elec_quantity_august +fuel_consumed_for_electricity_units_september,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_sep,elec_quantity_september,elec_quantity_sep,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september,elec_quantity_september +fuel_consumed_for_electricity_units_october,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_oct,elec_quantity_october,elec_quantity_oct,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october,elec_quantity_october +fuel_consumed_for_electricity_units_november,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_nov,elec_quantity_november,elec_quantity_nov,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november,elec_quantity_november +fuel_consumed_for_electricity_units_december,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_dec,elec_quantity_december,elec_quantity_dec,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december,elec_quantity_december +fuel_mmbtu_per_unit_january,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtu_per_unit_jan,mmbtuper_unit_jan,mmbtuper_unit_january,mmbtuper_unit_jan,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january,mmbtuper_unit_january +fuel_mmbtu_per_unit_february,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtu_per_unit_feb,mmbtuper_unit_feb,mmbtuper_unit_february,mmbtuper_unit_feb,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february,mmbtuper_unit_february +fuel_mmbtu_per_unit_march,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtu_per_unit_mar,mmbtuper_unit_mar,mmbtuper_unit_march,mmbtuper_unit_mar,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march,mmbtuper_unit_march +fuel_mmbtu_per_unit_april,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtu_per_unit_apr,mmbtuper_unit_apr,mmbtuper_unit_april,mmbtuper_unit_apr,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april,mmbtuper_unit_april +fuel_mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtu_per_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may,mmbtuper_unit_may +fuel_mmbtu_per_unit_june,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtu_per_unit_jun,mmbtuper_unit_jun,mmbtuper_unit_june,mmbtuper_unit_jun,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june,mmbtuper_unit_june +fuel_mmbtu_per_unit_july,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtu_per_unit_jul,mmbtuper_unit_jul,mmbtuper_unit_july,mmbtuper_unit_jul,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july,mmbtuper_unit_july +fuel_mmbtu_per_unit_august,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtu_per_unit_aug,mmbtuper_unit_aug,mmbtuper_unit_august,mmbtuper_unit_aug,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august,mmbtuper_unit_august +fuel_mmbtu_per_unit_september,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtu_per_unit_sep,mmbtuper_unit_sep,mmbtuper_unit_september,mmbtuper_unit_sep,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september,mmbtuper_unit_september +fuel_mmbtu_per_unit_october,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtu_per_unit_oct,mmbtuper_unit_oct,mmbtuper_unit_october,mmbtuper_unit_oct,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october,mmbtuper_unit_october +fuel_mmbtu_per_unit_november,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtu_per_unit_nov,mmbtuper_unit_nov,mmbtuper_unit_november,mmbtuper_unit_nov,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november,mmbtuper_unit_november +fuel_mmbtu_per_unit_december,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtu_per_unit_dec,mmbtuper_unit_dec,mmbtuper_unit_december,mmbtuper_unit_dec,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december,mmbtuper_unit_december +fuel_consumed_mmbtu_january,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtu_jan,tot_mmbtujan,tot_mmbtu_january,tot_mmbtujan,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january,tot_mmbtu_january +fuel_consumed_mmbtu_february,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtu_feb,tot_mmbtufeb,tot_mmbtu_february,tot_mmbtufeb,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february,tot_mmbtu_february +fuel_consumed_mmbtu_march,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtu_mar,tot_mmbtumar,tot_mmbtu_march,tot_mmbtumar,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march,tot_mmbtu_march +fuel_consumed_mmbtu_april,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtu_apr,tot_mmbtuapr,tot_mmbtu_april,tot_mmbtuapr,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april,tot_mmbtu_april +fuel_consumed_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtumay,tot_mmbtu_may,tot_mmbtumay,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may,tot_mmbtu_may +fuel_consumed_mmbtu_june,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtu_jun,tot_mmbtujun,tot_mmbtu_june,tot_mmbtujun,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june,tot_mmbtu_june +fuel_consumed_mmbtu_july,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtu_jul,tot_mmbtujul,tot_mmbtu_july,tot_mmbtujul,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july,tot_mmbtu_july +fuel_consumed_mmbtu_august,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtu_aug,tot_mmbtuaug,tot_mmbtu_august,tot_mmbtuaug,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august,tot_mmbtu_august +fuel_consumed_mmbtu_september,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtu_sep,tot_mmbtusep,tot_mmbtu_september,tot_mmbtusep,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september,tot_mmbtu_september +fuel_consumed_mmbtu_october,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtu_oct,tot_mmbtuoct,tot_mmbtu_october,tot_mmbtuoct,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october,tot_mmbtu_october +fuel_consumed_mmbtu_november,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtu_nov,tot_mmbtunov,tot_mmbtu_november,tot_mmbtunov,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november,tot_mmbtu_november +fuel_consumed_mmbtu_december,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtu_dec,tot_mmbtudec,tot_mmbtu_december,tot_mmbtudec,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december,tot_mmbtu_december +fuel_consumed_for_electricity_mmbtu_january,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtus_jan,elec_mmbtujan,elec_mmbtu_january,elec_mmbtujan,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january,elec_mmbtu_january +fuel_consumed_for_electricity_mmbtu_february,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtus_feb,elec_mmbtufeb,elec_mmbtu_february,elec_mmbtufeb,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february,elec_mmbtu_february +fuel_consumed_for_electricity_mmbtu_march,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtus_mar,elec_mmbtumar,elec_mmbtu_march,elec_mmbtumar,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march,elec_mmbtu_march +fuel_consumed_for_electricity_mmbtu_april,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtus_apr,elec_mmbtuapr,elec_mmbtu_april,elec_mmbtuapr,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april,elec_mmbtu_april +fuel_consumed_for_electricity_mmbtu_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtus_may,elec_mmbtumay,elec_mmbtu_may,elec_mmbtumay,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may,elec_mmbtu_may +fuel_consumed_for_electricity_mmbtu_june,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtus_jun,elec_mmbtujun,elec_mmbtu_june,elec_mmbtujun,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june,elec_mmbtu_june +fuel_consumed_for_electricity_mmbtu_july,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtus_jul,elec_mmbtujul,elec_mmbtu_july,elec_mmbtujul,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july,elec_mmbtu_july +fuel_consumed_for_electricity_mmbtu_august,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtus_aug,elec_mmbtuaug,elec_mmbtu_august,elec_mmbtuaug,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august,elec_mmbtu_august +fuel_consumed_for_electricity_mmbtu_september,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtus_sep,elec_mmbtusep,elec_mmbtu_september,elec_mmbtusep,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september,elec_mmbtu_september +fuel_consumed_for_electricity_mmbtu_october,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtus_oct,elec_mmbtuoct,elec_mmbtu_october,elec_mmbtuoct,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october,elec_mmbtu_october +fuel_consumed_for_electricity_mmbtu_november,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtus_nov,elec_mmbtunov,elec_mmbtu_november,elec_mmbtunov,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november,elec_mmbtu_november +fuel_consumed_for_electricity_mmbtu_december,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtus_dec,elec_mmbtudec,elec_mmbtu_december,elec_mmbtudec,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december,elec_mmbtu_december +net_generation_mwh_january,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_jan,netgen_january,netgen_jan,netgen_january,netgen_january,netgen_january,netgen_january,netgen_january,netgen_january +net_generation_mwh_february,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_feb,netgen_february,netgen_feb,netgen_february,netgen_february,netgen_february,netgen_february,netgen_february,netgen_february +net_generation_mwh_march,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_mar,netgen_march,netgen_mar,netgen_march,netgen_march,netgen_march,netgen_march,netgen_march,netgen_march +net_generation_mwh_april,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_apr,netgen_april,netgen_apr,netgen_april,netgen_april,netgen_april,netgen_april,netgen_april,netgen_april +net_generation_mwh_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may,netgen_may +net_generation_mwh_june,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_jun,netgen_june,netgen_jun,netgen_june,netgen_june,netgen_june,netgen_june,netgen_june,netgen_june +net_generation_mwh_july,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_jul,netgen_july,netgen_jul,netgen_july,netgen_july,netgen_july,netgen_july,netgen_july,netgen_july +net_generation_mwh_august,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_aug,netgen_august,netgen_aug,netgen_august,netgen_august,netgen_august,netgen_august,netgen_august,netgen_august +net_generation_mwh_september,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_sep,netgen_september,netgen_sep,netgen_september,netgen_september,netgen_september,netgen_september,netgen_september,netgen_september +net_generation_mwh_october,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_oct,netgen_october,netgen_oct,netgen_october,netgen_october,netgen_october,netgen_october,netgen_october,netgen_october +net_generation_mwh_november,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_nov,netgen_november,netgen_nov,netgen_november,netgen_november,netgen_november,netgen_november,netgen_november,netgen_november +net_generation_mwh_december,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_dec,netgen_december,netgen_dec,netgen_december,netgen_december,netgen_december,netgen_december,netgen_december,netgen_december +total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity +electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity,electric_fuel_consumption_quantity +total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtus,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtu +elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu +net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours +report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generator.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generator.csv index 179359765a..a0f3a1d6bb 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generator.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/generator.csv @@ -1,28 +1,28 @@ -year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant -plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id -plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state -census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region -nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region -naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code -eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,sector_number,eia_sector_number,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number -sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name -generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id -prime_mover_code,prime_mover_type,prime_mover_type,prime_mover_type,prime_mover_type,reported_prime_mover,prime_mover_type,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover -net_generation_mwh_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january -net_generation_mwh_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february -net_generation_mwh_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march -net_generation_mwh_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april -net_generation_mwh_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may -net_generation_mwh_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june -net_generation_mwh_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july -net_generation_mwh_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august -net_generation_mwh_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september -net_generation_mwh_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october -net_generation_mwh_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november -net_generation_mwh_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december -net_generation_mwh_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date -report_year,year,year,year,year,year,year,year,year,year,year,year,year +year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id +combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant +plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name +operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state +census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region +nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region +naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code +eia_sector,eia_sector_number,eia_sector_number,eia_sector_number,eia_sector_number,sector_number,eia_sector_number,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number +sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector_name +generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id,generator_id +prime_mover_code,prime_mover_type,prime_mover_type,prime_mover_type,prime_mover_type,reported_prime_mover,prime_mover_type,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover,reported_prime_mover +net_generation_mwh_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january,net_generation_january +net_generation_mwh_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february,net_generation_february +net_generation_mwh_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march,net_generation_march +net_generation_mwh_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april,net_generation_april +net_generation_mwh_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may,net_generation_may +net_generation_mwh_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june,net_generation_june +net_generation_mwh_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july,net_generation_july +net_generation_mwh_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august,net_generation_august +net_generation_mwh_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september,net_generation_september +net_generation_mwh_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october,net_generation_october +net_generation_mwh_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november,net_generation_november +net_generation_mwh_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december,net_generation_december +net_generation_mwh_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date +report_year,year,year,year,year,year,year,year,year,year,year,year,year diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/plant_frame.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/plant_frame.csv index 0093b22bb2..f3c12e2485 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/plant_frame.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/plant_frame.csv @@ -1,11 +1,11 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -report_year,,,year,year,year,year,year,year,year,year,year -report_month,,,,,,month,month,month,month,month, -plant_id_eia,,,eia_plant_id,plant_id,eia_plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -plant_state,,,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state -eia_sector,,,sector,sector_number,sector,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number -naics_code,,,north_american_industiral_classification_system_naics_code,naics_code,north_american_industrial_classification_system_naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code -plant_name_eia,,,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -combined_heat_power,,,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status -reporting_frequency,,,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency -nameplate_capacity_mw,,,nameplate_capacity_mw,,,,,,,, +year_index,2011,2012,2013,2014,2015,2016,2017,2018,2019 +report_year,year,year,year,year,year,year,year,year,year +report_month,,,,month,month,month,month,month, +plant_id_eia,eia_plant_id,plant_id,eia_plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id +plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state +eia_sector,sector,sector_number,sector,sector_number,sector_number,sector_number,sector_number,sector_number,sector_number +naics_code,north_american_industiral_classification_system_naics_code,naics_code,north_american_industrial_classification_system_naics_code,naics_code,naics_code,naics_code,naics_code,naics_code,naics_code +plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name +combined_heat_power,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status +reporting_frequency,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency +nameplate_capacity_mw,nameplate_capacity_mw,,,,,,,, diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/stocks.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/stocks.csv index 1226f4ecb3..b21271a7a8 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/stocks.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/column_maps/stocks.csv @@ -1,38 +1,38 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -census_division_and_state,,,region_name,census_division_and_state,region_name,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state -coal_january,coal_jan,coal_jan,coal_jan,coal_january,coal_jan,coal_january,coal_january,coal_january,coal_january,coal_january,coal_january -coal_february,coal_feb,coal_feb,coal_feb,coal_february,coal_feb,coal_february,coal_february,coal_february,coal_february,coal_february,coal_february -coal_march,coal_mar,coal_mar,coal_mar,coal_march,coal_mar,coal_march,coal_march,coal_march,coal_march,coal_march,coal_march -coal_april,coal_apr,coal_apr,coal_apr,coal_april,coal_apr,coal_april,coal_april,coal_april,coal_april,coal_april,coal_april -coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may -coal_june,coal_jun,coal_jun,coal_jun,coal_june,coal_jun,coal_june,coal_june,coal_june,coal_june,coal_june,coal_june -coal_july,coal_jul,coal_jul,coal_jul,coal_july,coal_jul,coal_july,coal_july,coal_july,coal_july,coal_july,coal_july -coal_august,coal_aug,coal_aug,coal_aug,coal_august,coal_aug,coal_august,coal_august,coal_august,coal_august,coal_august,coal_august -coal_september,coal_sep,coal_sep,coal_sep,coal_september,coal_sep,coal_september,coal_september,coal_september,coal_september,coal_september,coal_september -coal_october,coal_oct,coal_oct,coal_oct,coal_october,coal_oct,coal_october,coal_october,coal_october,coal_october,coal_october,coal_october -coal_november,coal_nov,coal_nov,coal_nov,coal_november,coal_nov,coal_november,coal_november,coal_november,coal_november,coal_november,coal_november -coal_december,coal_dec,coal_dec,coal_dec,coal_december,coal_dec,coal_december,coal_december,coal_december,coal_december,coal_december,coal_december -oil_january,oil_jan,oil_jan,oil_jan,oil_january,oil_jan,oil_january,oil_january,oil_january,oil_january,oil_january,oil_january -oil_february,oil_feb,oil_feb,oil_feb,oil_february,oil_feb,oil_february,oil_february,oil_february,oil_february,oil_february,oil_february -oil_march,oil_mar,oil_mar,oil_mar,oil_march,oil_mar,oil_march,oil_march,oil_march,oil_march,oil_march,oil_march -oil_april,oil_apr,oil_apr,oil_apr,oil_april,oil_apr,oil_april,oil_april,oil_april,oil_april,oil_april,oil_april -oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may -oil_june,oil_jun,oil_jun,oil_jun,oil_june,oil_jun,oil_june,oil_june,oil_june,oil_june,oil_june,oil_june -oil_july,oil_jul,oil_jul,oil_jul,oil_july,oil_jul,oil_july,oil_july,oil_july,oil_july,oil_july,oil_july -oil_august,oil_aug,oil_aug,oil_aug,oil_august,oil_aug,oil_august,oil_august,oil_august,oil_august,oil_august,oil_august -oil_september,oil_sep,oil_sep,oil_sep,oil_september,oil_sep,oil_september,oil_september,oil_september,oil_september,oil_september,oil_september -oil_october,oil_oct,oil_oct,oil_oct,oil_october,oil_oct,oil_october,oil_october,oil_october,oil_october,oil_october,oil_october -oil_november,oil_nov,oil_nov,oil_nov,oil_november,oil_nov,oil_november,oil_november,oil_november,oil_november,oil_november,oil_november -oil_december,oil_dec,oil_dec,oil_dec,oil_december,oil_dec,oil_december,oil_december,oil_december,oil_december,oil_december,oil_december -petcoke_january,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_january,petcoke_jan,petcoke_january,petcoke_january,petcoke_january,petcoke_january,petcoke_january,petcoke_january -petcoke_february,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_february,petcoke_feb,petcoke_february,petcoke_february,petcoke_february,petcoke_february,petcoke_february,petcoke_february -petcoke_march,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_march,petcoke_mar,petcoke_march,petcoke_march,petcoke_march,petcoke_march,petcoke_march,petcoke_march -petcoke_april,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_april,petcoke_apr,petcoke_april,petcoke_april,petcoke_april,petcoke_april,petcoke_april,petcoke_april -petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may -petcoke_june,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_june,petcoke_jun,petcoke_june,petcoke_june,petcoke_june,petcoke_june,petcoke_june,petcoke_june -petcoke_july,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_july,petcoke_jul,petcoke_july,petcoke_july,petcoke_july,petcoke_july,petcoke_july,petcoke_july -petcoke_august,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_august,petcoke_aug,petcoke_august,petcoke_august,petcoke_august,petcoke_august,petcoke_august,petcoke_august -petcoke_september,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_september,petcoke_sep,petcoke_september,petcoke_september,petcoke_september,petcoke_september,petcoke_september,petcoke_september -petcoke_october,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_october,petcoke_oct,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october -petcoke_november,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_november,petcoke_nov,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november -petcoke_december,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_december,petcoke_dec,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december +year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +census_division_and_state,,,,,,,,,,,region_name,census_division_and_state,region_name,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state,census_division_and_state +coal_january,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_jan,coal_january,coal_jan,coal_january,coal_january,coal_january,coal_january,coal_january,coal_january +coal_february,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_feb,coal_february,coal_feb,coal_february,coal_february,coal_february,coal_february,coal_february,coal_february +coal_march,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_mar,coal_march,coal_mar,coal_march,coal_march,coal_march,coal_march,coal_march,coal_march +coal_april,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_apr,coal_april,coal_apr,coal_april,coal_april,coal_april,coal_april,coal_april,coal_april +coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may,coal_may +coal_june,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_jun,coal_june,coal_jun,coal_june,coal_june,coal_june,coal_june,coal_june,coal_june +coal_july,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_jul,coal_july,coal_jul,coal_july,coal_july,coal_july,coal_july,coal_july,coal_july +coal_august,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_aug,coal_august,coal_aug,coal_august,coal_august,coal_august,coal_august,coal_august,coal_august +coal_september,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_sep,coal_september,coal_sep,coal_september,coal_september,coal_september,coal_september,coal_september,coal_september +coal_october,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_oct,coal_october,coal_oct,coal_october,coal_october,coal_october,coal_october,coal_october,coal_october +coal_november,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_nov,coal_november,coal_nov,coal_november,coal_november,coal_november,coal_november,coal_november,coal_november +coal_december,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_dec,coal_december,coal_dec,coal_december,coal_december,coal_december,coal_december,coal_december,coal_december +oil_january,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_jan,oil_january,oil_jan,oil_january,oil_january,oil_january,oil_january,oil_january,oil_january +oil_february,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_feb,oil_february,oil_feb,oil_february,oil_february,oil_february,oil_february,oil_february,oil_february +oil_march,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_mar,oil_march,oil_mar,oil_march,oil_march,oil_march,oil_march,oil_march,oil_march +oil_april,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_apr,oil_april,oil_apr,oil_april,oil_april,oil_april,oil_april,oil_april,oil_april +oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may,oil_may +oil_june,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_jun,oil_june,oil_jun,oil_june,oil_june,oil_june,oil_june,oil_june,oil_june +oil_july,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_jul,oil_july,oil_jul,oil_july,oil_july,oil_july,oil_july,oil_july,oil_july +oil_august,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_aug,oil_august,oil_aug,oil_august,oil_august,oil_august,oil_august,oil_august,oil_august +oil_september,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_sep,oil_september,oil_sep,oil_september,oil_september,oil_september,oil_september,oil_september,oil_september +oil_october,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_oct,oil_october,oil_oct,oil_october,oil_october,oil_october,oil_october,oil_october,oil_october +oil_november,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_nov,oil_november,oil_nov,oil_november,oil_november,oil_november,oil_november,oil_november,oil_november +oil_december,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_dec,oil_december,oil_dec,oil_december,oil_december,oil_december,oil_december,oil_december,oil_december +petcoke_january,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_jan,petcoke_january,petcoke_jan,petcoke_january,petcoke_january,petcoke_january,petcoke_january,petcoke_january,petcoke_january +petcoke_february,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_feb,petcoke_february,petcoke_feb,petcoke_february,petcoke_february,petcoke_february,petcoke_february,petcoke_february,petcoke_february +petcoke_march,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_mar,petcoke_march,petcoke_mar,petcoke_march,petcoke_march,petcoke_march,petcoke_march,petcoke_march,petcoke_march +petcoke_april,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_apr,petcoke_april,petcoke_apr,petcoke_april,petcoke_april,petcoke_april,petcoke_april,petcoke_april,petcoke_april +petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may,petcoke_may +petcoke_june,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_jun,petcoke_june,petcoke_jun,petcoke_june,petcoke_june,petcoke_june,petcoke_june,petcoke_june,petcoke_june +petcoke_july,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_jul,petcoke_july,petcoke_jul,petcoke_july,petcoke_july,petcoke_july,petcoke_july,petcoke_july,petcoke_july +petcoke_august,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_aug,petcoke_august,petcoke_aug,petcoke_august,petcoke_august,petcoke_august,petcoke_august,petcoke_august,petcoke_august +petcoke_september,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_september,petcoke_sep,petcoke_september,petcoke_september,petcoke_september,petcoke_september,petcoke_september,petcoke_september +petcoke_october,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_october,petcoke_oct,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october +petcoke_november,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_november,petcoke_nov,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november +petcoke_december,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_december,petcoke_dec,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/file_map.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/file_map.csv index ed67a4385f..92692723e5 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/file_map.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/file_map.csv @@ -1,12 +1,12 @@ -page,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -boiler_fuel,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -coal_stocks,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -energy_storage,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -fuel_receipts_costs,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -generation_fuel,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -generator,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -oil_stocks,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -petcoke_stocks,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -plant_frame,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -puerto_rico,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx -stocks,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +page,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +boiler_fuel,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +coal_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +energy_storage,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +generation_fuel,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +generator,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +oil_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +petcoke_stocks,-1,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +puerto_rico,-1,-1,-1,-1,-1,-1,-1,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx +stocks,f906920y2001.xls,f906920y2002.xls,f906920_2003.xls,f906920_2004.xls,f906920_2005.xls,f906920_2006.xls,f906920_2007.xls,eia923December2008.xls,EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS,EIA923 SCHEDULES 2_3_4_5 Final 2010.xls,EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx,EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/skipfooter.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/skipfooter.csv index c5b4d22c06..511bda337e 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/skipfooter.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/skipfooter.csv @@ -1,12 +1,12 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -generation_fuel,0,0,0,0,0,0,0,0,0,0,0 -puerto_rico,0,0,0,0,0,0,0,0,0,0,0 -stocks,0,0,0,0,0,0,0,0,0,0,0 -oil_stocks,0,0,0,0,0,0,0,0,0,0,0 -coal_stocks,0,0,0,0,0,0,0,0,0,0,0 -petcoke_stocks,0,0,0,0,0,0,0,0,0,0,0 -energy_storage,0,0,0,0,0,0,0,0,0,0,0 -boiler_fuel,0,0,0,0,0,0,0,0,0,0,0 -generator,0,0,0,0,0,0,0,0,0,0,0 -fuel_receipts_costs,0,0,0,0,0,0,0,0,0,0,0 -plant_frame,0,0,0,0,0,0,0,0,0,0,0 +year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +generation_fuel,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +puerto_rico,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +stocks,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +oil_stocks,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +coal_stocks,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +petcoke_stocks,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +energy_storage,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +boiler_fuel,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +generator,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +fuel_receipts_costs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +plant_frame,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/skiprows.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/skiprows.csv index e6897791ce..837b19bbb9 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/skiprows.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/skiprows.csv @@ -1,12 +1,12 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -generation_fuel,7,7,5,5,5,5,5,5,5,5,5 -puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,5,5,6 -stocks,7,7,5,5,5,5,5,5,5,5,5 -oil_stocks,5,5,5,5,5,5,5,5,5,5,5 -coal_stocks,5,5,5,5,5,5,5,5,5,-1,-1 -petcoke_stocks,5,5,5,5,5,5,5,5,5,-1,-1 -energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5 -boiler_fuel,7,7,5,5,5,5,5,5,5,5,5 -generator,7,7,5,5,5,5,5,5,5,5,5 -fuel_receipts_costs,6,7,4,4,4,4,4,4,4,4,4 -plant_frame,-1,-1,4,4,4,4,4,4,4,4,4 +year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +generation_fuel,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5 +puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,6 +stocks,7,7,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,5 +oil_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +coal_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,-1,-1 +petcoke_stocks,-1,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,-1,-1 +energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5 +boiler_fuel,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5 +generator,-1,-1,-1,-1,-1,-1,-1,7,7,7,5,5,5,5,5,5,5,5,5 +fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,7,6,7,4,4,4,4,4,4,4,4,4 +plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,4,4,4,4,4,4,4,4,4 diff --git a/src/pudl/package_data/meta/xlsx_maps/eia923/tab_map.csv b/src/pudl/package_data/meta/xlsx_maps/eia923/tab_map.csv index 94ad020b08..afe0de9045 100644 --- a/src/pudl/package_data/meta/xlsx_maps/eia923/tab_map.csv +++ b/src/pudl/package_data/meta/xlsx_maps/eia923/tab_map.csv @@ -1,13 +1,13 @@ -year_index,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 -generation_fuel,0,0,0,0,0,0,0,0,0,0,0 -puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1 -stocks,1,1,1,1,1,1,1,1,2,3,3 -oil_stocks,2,2,2,2,2,2,2,2,3,4,4 -coal_stocks,3,3,3,3,3,3,3,3,4,5,-1 -petcoke_stocks,4,4,4,4,4,4,4,4,5,6,-1 -energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2 -boiler_fuel,5,5,5,5,5,5,5,5,6,7,5 -generator,6,6,6,6,6,6,6,6,7,8,6 -fuel_receipts_costs,7,7,7,7,7,7,7,7,8,9,7 -plant_frame,-1,-1,8,8,8,8,8,8,9,10,8 -plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,9 +year_index,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 +generation_fuel,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1 +stocks,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,3,3 +oil_stocks,-1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,4,4 +coal_stocks,-1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,5,-1 +petcoke_stocks,-1,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,6,-1 +energy_storage,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2 +boiler_fuel,-1,-1,-1,-1,-1,-1,-1,5,5,5,5,5,5,5,5,5,6,7,5 +generator,-1,-1,-1,-1,-1,-1,-1,6,6,6,6,6,6,6,6,6,7,8,6 +fuel_receipts_costs,-1,-1,-1,-1,-1,-1,-1,7,7,7,7,7,7,7,7,7,8,9,7 +plant_frame,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,8,8,8,8,8,8,9,10,8 +plant_frame_puerto_rico,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,9 diff --git a/src/pudl/package_data/settings/etl_full.yml b/src/pudl/package_data/settings/etl_full.yml index f2589908a5..5b05742f50 100644 --- a/src/pudl/package_data/settings/etl_full.yml +++ b/src/pudl/package_data/settings/etl_full.yml @@ -176,8 +176,8 @@ datapkg_bundle_settings: - coalmine_eia923 # REQUIRES fuel_receipts_costs_eia923 - fuel_receipts_costs_eia923 eia923_years: [ - 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, - 2019 + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, + 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 ] eia860_tables: - boiler_generator_assn_eia860 @@ -219,8 +219,8 @@ datapkg_bundle_settings: - coalmine_eia923 # REQUIRES fuel_receipts_costs_eia923 - fuel_receipts_costs_eia923 eia923_years: [ - 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, - 2019 + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, + 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 ] # See notes above about the entanglement between EIA 923 and EIA 860. # It's best to load all the tables from both of them, or neither of @@ -245,5 +245,5 @@ datapkg_bundle_settings: epacems_years: [ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, - 2015, 2016, 2017, 2018, 2019, + 2015, 2016, 2017, 2018, 2019, 2020 ] diff --git a/src/pudl/transform/eia860.py b/src/pudl/transform/eia860.py index 4fd0a498e8..4a9a3fbe49 100644 --- a/src/pudl/transform/eia860.py +++ b/src/pudl/transform/eia860.py @@ -65,12 +65,12 @@ def ownership(eia860_dfs, eia860_transformed_dfs): # This has to come before the fancy indexing below, otherwise the plant_id_eia # is still a float. - own_df = own_df.astype({ - "owner_utility_id_eia": pd.Int64Dtype(), - "utility_id_eia": pd.Int64Dtype(), - "plant_id_eia": pd.Int64Dtype(), - "owner_state": pd.StringDtype() - }) + own_df = own_df.astype(pudl.helpers.get_pudl_dtypes({ + "owner_utility_id_eia": "eia", + "utility_id_eia": "eia", + "plant_id_eia": "eia", + "owner_state": "eia", + })) # A small number of generators are reported multiple times in the ownership # table due to the use of leading zeroes in their integer generator_id values @@ -582,8 +582,8 @@ def transform(eia860_raw_dfs, eia860_tables=pc.pudl_tables["eia860"]): # for each of the tables, run the respective transform funtction for table in eia860_transform_functions: if table in eia860_tables: - logger.info(f"Transforming raw EIA 860 DataFrames for {table} " - f"concatenated across all years.") + logger.info("Transforming raw EIA 860 DataFrames for %s " + "concatenated across all years.", table) eia860_transform_functions[table](eia860_raw_dfs, eia860_transformed_dfs) diff --git a/src/pudl/transform/eia861.py b/src/pudl/transform/eia861.py index 8b1adce849..2b25616364 100644 --- a/src/pudl/transform/eia861.py +++ b/src/pudl/transform/eia861.py @@ -236,11 +236,11 @@ "balancing_authority_name_eia", # We have this ]) .assign(report_date=lambda x: pd.to_datetime(x.report_date)) - .astype({ - "utility_id_eia": pd.Int64Dtype(), - "balancing_authority_id_eia": pd.Int64Dtype(), - "balancing_authority_name_eia": pd.StringDtype(), - }) + .astype(pudl.helpers.get_pudl_dtypes({ + "utility_id_eia": "eia", + "balancing_authority_id_eia": "eia", + "balancing_authority_name_eia": "eia", + })) .dropna(subset=["report_date", "balancing_authority_name_eia", "utility_id_eia"]) .set_index(["report_date", "balancing_authority_name_eia", "utility_id_eia"]) ) @@ -495,7 +495,7 @@ def _tidy_class_dfs(df, df_name, idx_cols, class_list, class_type, keep_totals=F ) raw_df = ( df.dropna(subset=["utility_id_eia"]) - .astype({"utility_id_eia": pd.Int64Dtype()}) + .astype(pudl.helpers.get_pudl_dtypes({"utility_id_eia": "eia"})) .set_index(idx_cols) ) # Split the table into index, data, and "denormalized" columns for processing: @@ -955,7 +955,7 @@ def balancing_authority_assn(tfr_dfs): tfr_dfs["balancing_authority_assn_eia861"] = ( pd.concat([early_date_ba_util_state, late_date_ba_util_state]) .dropna(subset=["balancing_authority_id_eia", ]) - .astype({"utility_id_eia": pd.Int64Dtype()}) + .astype(pudl.helpers.get_pudl_dtypes({"utility_id_eia": "eia"})) ) return tfr_dfs diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 22cc4f918b..327261fcb4 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -15,7 +15,7 @@ ############################################################################### -def _yearly_to_monthly_records(df, md): +def _yearly_to_monthly_records(df): """Converts an EIA 923 record of 12 months of data into 12 monthly records. Much of the data reported in EIA 923 is monthly, but all 12 months worth of data is @@ -27,44 +27,48 @@ def _yearly_to_monthly_records(df, md): Args: df (pandas.DataFrame): A pandas DataFrame containing the annual data to be converted into monthly records. - md (dict): a dictionary with the integers 1-12 as keys, and the patterns used - to match field names for each of the months as values. These patterns are - also used to rename the columns in the dataframe which is returned, so they - need to match the entire portion of the column name that is month specific. Returns: pandas.DataFrame: A dataframe containing the same data as was passed in via df, - but with monthly records instead of annual records. + but with monthly records as rows instead of as columns. """ - yearly = df.copy() - all_years = pd.DataFrame() - - for y in yearly.report_year.unique(): - this_year = yearly[yearly.report_year == y].copy() - monthly = pd.DataFrame() - for m in md: - # Grab just the columns for the month we're working on. - this_month = this_year.filter(regex=md[m]).copy() - # Drop this month's data from the yearly data frame. - this_year.drop(this_month.columns, axis=1, inplace=True) - # Rename this month's columns to get rid of the month reference. - this_month.columns = this_month.columns.str.replace( - md[m], '', regex=True) - # Add a numerical month column corresponding to this month. - this_month['report_month'] = m - # Add this month's data to the monthly DataFrame we're building. - monthly = pd.concat([monthly, this_month], sort=True) - - # Merge the monthly data we've built up with the remaining fields in - # the data frame we started with -- all of which should be independent - # of the month, and apply across all 12 of the monthly records created - # from each of the # initial annual records. - this_year = this_year.merge(monthly, left_index=True, right_index=True) - # Add this new year's worth of data to the big dataframe we'll return - all_years = pd.concat([all_years, this_year], sort=True) - - return all_years + month_dict = { + 'january': 1, + 'february': 2, + 'march': 3, + 'april': 4, + 'may': 5, + 'june': 6, + 'july': 7, + 'august': 8, + 'september': 9, + 'october': 10, + 'november': 11, + 'december': 12 + } + multi_idx = df.columns.str.rsplit( + "_", n=1, expand=True).set_names([None, 'report_month']) + ends_with_month_filter = multi_idx.get_level_values( + 'report_month').isin(set(month_dict.keys())) + if not ends_with_month_filter.any(): + return df + index_cols = df.columns[~ends_with_month_filter] + # performance note: this was good enough for eia923 data size. + # Using .set_index() is simple but inefficient due to unecessary index creation. + # Performance may be improved by separating into two dataframes, + # .stack()ing the monthly data, then joining back together on the original index. + df = df.set_index(list(index_cols), append=True) + # convert month names to numbers (january -> 1) + col_df = multi_idx[ends_with_month_filter].to_frame(index=False) + col_df.loc[:, 'report_month'] = col_df.loc[:, 'report_month'].map(month_dict) + month_idx = pd.MultiIndex.from_frame(col_df).set_names([None, 'report_month']) + # reshape + df.columns = month_idx + df = df.stack() + # restore original index and columns - reset index except level 0 + df = df.reset_index(level=list(range(1, df.index.nlevels))) + return df def _coalmine_cleanup(cmi_df): @@ -252,13 +256,30 @@ def generation_fuel(eia923_dfs, eia923_transformed_dfs): gf_df.drop(cols_to_drop, axis=1, inplace=True) # Convert the EIA923 DataFrame from yearly to monthly records. - gf_df = _yearly_to_monthly_records(gf_df, pc.month_dict_eia923) + gf_df = _yearly_to_monthly_records(gf_df) # Replace the EIA923 NA value ('.') with a real NA value. gf_df = pudl.helpers.fix_eia_na(gf_df) # Remove "State fuel-level increment" records... which don't pertain to # any particular plant (they have plant_id_eia == operator_id == 99999) gf_df = gf_df[gf_df.plant_id_eia != 99999] + # conservative manual correction for bad prime mover codes + gf_df['prime_mover_code'] = gf_df['prime_mover_code'].replace({ + 'CC': '' # one plant in 2004. Pre-2004, it was '', post-2004, it was broken into combined cycle parts + }) + + # conservative manual corrections for misplaced or mistyped fuel types + gf_df['fuel_type'] = gf_df['fuel_type'].replace({ + # mistyped, 1 record in 2002 (as of 2019 data) + 'OW': 'WO', + # duplicated AER fuel code, subtype not reported. One record in 2001 (as of 2019 data) + 'COL': '', + # duplicated AER fuel code, maps unambiguously to 'wat'. 4 records in 2001 (as of 2019 data) + 'HPS': 'WAT', + # duplicated AER fuel code, subtype not reported. 12 records in 2001 (as of 2019 data) + 'OOG': '', + }) + gf_df['fuel_type_code_pudl'] = ( pudl.helpers.cleanstrings_series(gf_df.fuel_type, pc.fuel_type_eia923_gen_fuel_simple_map) @@ -318,7 +339,7 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs): bf_df.dropna(subset=['boiler_id', 'plant_id_eia'], inplace=True) # Convert the EIA923 DataFrame from yearly to monthly records. - bf_df = _yearly_to_monthly_records(bf_df, pc.month_dict_eia923) + bf_df = _yearly_to_monthly_records(bf_df) bf_df['fuel_type_code_pudl'] = pudl.helpers.cleanstrings_series( bf_df.fuel_type_code, pc.fuel_type_eia923_boiler_fuel_simple_map) @@ -372,7 +393,7 @@ def generation(eia923_dfs, eia923_transformed_dfs): 'sector_name', 'net_generation_mwh_year_to_date'], axis="columns") - .pipe(_yearly_to_monthly_records, pc.month_dict_eia923) + .pipe(_yearly_to_monthly_records) .pipe(pudl.helpers.fix_eia_na) .pipe(pudl.helpers.convert_to_date) ) @@ -540,6 +561,10 @@ def fuel_receipts_costs(eia923_dfs, eia923_transformed_dfs): x.primary_transportation_mode_code.str.upper()), secondary_transportation_mode_code=lambda x: ( x.secondary_transportation_mode_code.str.upper()), + # convert contract type "N" to "NC". The "N" code existed only + # in 2008, the first year contracts were reported to the EIA, + # before being replaced by "NC". + contract_type_code=lambda x: x.contract_type_code.replace({'N': 'NC'}), fuel_cost_per_mmbtu=lambda x: x.fuel_cost_per_mmbtu / 100, fuel_group_code=lambda x: ( x.fuel_group_code.str.lower().str.replace(' ', '_')), diff --git a/src/pudl/transform/epacems.py b/src/pudl/transform/epacems.py index bd2ae49bdb..2b70c5cd89 100644 --- a/src/pudl/transform/epacems.py +++ b/src/pudl/transform/epacems.py @@ -88,7 +88,11 @@ def _load_plant_utc_offset(datapkg_dir): pd.read_csv( pathlib.Path(datapkg_dir, 'data/plants_entity_eia.csv'), usecols=["plant_id_eia", "timezone"], - dtype={"plant_id_eia": "Int64", "timezone": pd.StringDtype()}) + dtype=pudl.helpers.get_pudl_dtypes({ + "plant_id_eia": "eia", + "timezone": "eia", + }) + ) .replace(to_replace="None", value=pd.NA) .dropna() ) diff --git a/src/pudl/validate.py b/src/pudl/validate.py index b931ed4bb5..f49e7f2d45 100644 --- a/src/pudl/validate.py +++ b/src/pudl/validate.py @@ -21,6 +21,124 @@ logger = logging.getLogger(__name__) +def intersect_indexes(indexes): + """ + Calculate the intersection of a collection of pandas Indexes. + + Args: + indexes (iterable of pandas.Index objects): + + Returns: + pandas.Index: The intersection of all values found in the input + indexes. + + """ + shared_idx = indexes[0] + for idx in indexes: + shared_idx = shared_idx.intersection(idx, sort=None) + return shared_idx + + +def check_date_freq(df1, df2, mult): + """ + Verify an expected relationship between time frequencies of two dataframes. + + Identify all distinct values of ``report_date`` in each of the input + dataframes and check that the number of distinct ``report_date`` values in + ``df2`` is ``mult`` times the number of ``report_date`` values in ``df1`` + across only those years which appear in both dataframes. This is primarily + aimed at comparing annual and monthly dataframes, but should + also work with e.g. annual (df1) and quarterly (df2) frequency data using + ``mult=4``. + + Note the function assumes that a dataframe with sub-annual frequency will + cover the entire year it's part of. If you have a partial year of monthly + data in one dataframe that overlaps with annual data in another dataframe + you'll probably get unexpected behavior. + + We use this method rather than attempting to infer a frequency from the + observed values because often we have only a single year of data, and you + need at least 3 values in a DatetimeIndex to infer the frequency. + + Args: + df1 (pandas.DataFrame): A dataframe with a column named ``report_date`` + which contains dates. + df2 (pandas.DataFrame): A dataframe with a column named ``report_date`` + which contains dates. + frequency. + mult (int): A multiplicative factor indicating the expected ratio + between the number of distinct date values found in ``df1`` and + ``df2``. E.g. if ``df1`` is annual and ``df2`` is monthly, ``mult`` + should be 12. + + Returns: + None + + Raises: + AssertionError: if the number of distinct ``report_date`` values in + ``df2`` is not ``mult`` times the number of distinct + ``report_date`` values in ``df1``. + ValueError: if either ``df1`` or ``df2`` does not have a + column named ``report_date`` + + """ + if ( + ("report_date" not in df1.columns) + or ("report_date" not in df2.columns) + ): + raise ValueError( + "Missing report_date column in one or both input DataFrames" + ) + + idx1 = pd.DatetimeIndex(df1.report_date.unique()) + idx2 = pd.DatetimeIndex(df2.report_date.unique()) + + overlap = intersect_indexes([idx1, idx2]) + overlap1 = [d for d in idx1 if d.year in overlap.year] + overlap2 = [d for d in idx2 if d.year in overlap.year] + + n1 = len(overlap1) + n2 = len(overlap2) + if mult * n1 != n2: + raise AssertionError( + f"Expected ratio of distinct report_date values to be {mult}, " + f"but found {n2} / {n1} = {n2/n1}" + ) + + +def no_null_rows(df, cols="all", df_name="", thresh=0.9): + """ + Check for rows filled with NA values indicating bad merges. + + Sum up the number of NA values in each row and the columns specified by + ``cols``. If the NA values make up more than ``thresh`` of the columns + overall, the row is considered Null and the check fails. + + Args: + df (pandas.DataFrame): DataFrame to check for null rows. + cols (iterable or "all"): The labels of columns to check for + all-null values. If "all" check all columns. + + Returns: + pandas.DataFrame: The input DataFrame, for use with DataFrame.pipe(). + + Raises: + ValueError: If the fraction of NA values in any row is greater than + ``thresh``. + + """ + if cols == "all": + cols = df.columns + + null_rows = df[cols].isna().sum(axis="columns") / len(cols) > thresh + if null_rows.any(): + raise ValueError( + f"Found {null_rows.sum(axis='rows')} Null rows in {df_name}." + ) + + return df + + def no_null_cols(df, cols="all", df_name=""): """Check that a dataframe has no all-NaN columns. @@ -45,9 +163,9 @@ def no_null_cols(df, cols="all", df_name=""): if cols == "all": cols = df.columns - for c in cols: - if df[c].isna().all(): - raise ValueError(f"Null column: {c} found in dataframe {df_name}") + null_cols = [c for c in cols if c in df.columns and df[c].isna().all()] + if null_cols: + raise ValueError(f"Null columns found in {df_name}: {null_cols}") return df @@ -56,11 +174,15 @@ def check_max_rows(df, expected_rows=np.inf, margin=0.05, df_name=""): """Validate that a dataframe has less than a maximum number of rows.""" len_df = len(df) max_rows = expected_rows * (1 + margin) + pct_off = (len_df - expected_rows) / expected_rows + msg = ( + f"{df_name}: found {len_df} rows, expected {expected_rows}. " + f"Off by {pct_off:.3%}, allowed margin of {margin:.3%}" + ) + if len_df > max_rows: - raise ValueError( - f"Too many records ({len_df}>{max_rows}) in dataframe {df_name}") - logger.info(f"{df_name}: expected {expected_rows} rows, " - f"found {len_df} rows.") + raise ValueError(msg) + logger.info(msg) return df @@ -69,11 +191,15 @@ def check_min_rows(df, expected_rows=0, margin=0.05, df_name=""): """Validate that a dataframe has a certain minimum number of rows.""" len_df = len(df) min_rows = expected_rows / (1 + margin) + pct_off = (len_df - expected_rows) / expected_rows + msg = ( + f"{df_name}: found {len_df} rows, expected {expected_rows}. " + f"Off by {pct_off:.3%}, allowed margin of {margin:.3%}" + ) + if len_df < min_rows: - raise ValueError( - f"Too few records ({len_df}<{min_rows}) in dataframe {df_name}") - logger.info(f"{df_name}: expected {expected_rows} rows, " - f"found {len_df} rows.") + raise ValueError(msg) + logger.info(msg) return df @@ -1168,7 +1294,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "low_bound": 5.0, "hi_q": 0.95, - "hi_bound": 6.5, + "hi_bound": 6.6, "data_col": "fuel_mmbtu_per_unit", "weight_col": "fuel_consumed_units", }, @@ -1320,7 +1446,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "low_bound": 5.0, "hi_q": 0.95, - "hi_bound": 6.5, + "hi_bound": 6.6, "data_col": "fuel_mmbtu_per_unit", "weight_col": "fuel_consumed_units", }, @@ -1871,7 +1997,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "low_bound": 3.0, "hi_q": 0.95, - "hi_bound": 5.8, + "hi_bound": 5.9, "data_col": "heat_content_mmbtu_per_unit", "weight_col": "fuel_qty_units", }, @@ -2580,7 +2706,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "low_bound": 10.0, "hi_q": 0.95, - "hi_bound": 50.0, + "hi_bound": 55.0, "data_col": "fuel_cost_per_mwh", "weight_col": "net_generation_mwh", }, @@ -2624,7 +2750,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "low_bound": 1.75, "hi_q": 0.95, - "hi_bound": 6.0, + "hi_bound": 6.7, "data_col": "fuel_cost_per_mmbtu", "weight_col": "total_mmbtu", }, diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 442fa671f5..162df0ea04 100755 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -153,7 +153,7 @@ class ZenodoFetcher: "eia860m": "10.5281/zenodo.4540268", "eia861": "10.5281/zenodo.4127029", "eia923": "10.5281/zenodo.4127040", - "epacems": "10.5281/zenodo.4127055", + "epacems": "10.5281/zenodo.4660268", "ferc1": "10.5281/zenodo.4127044", "ferc714": "10.5281/zenodo.4127101", }, diff --git a/test/conftest.py b/test/conftest.py index c822e2ba7b..9a066f8741 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -266,6 +266,8 @@ def pudl_settings_dict(request, live_dbs, tmpdir_factory): # noqa: C901 "pudl_db"] pudl_settings["ferc1_db"] = pudl.workspace.setup.get_defaults()[ "ferc1_db"] + pudl_settings["censusdp1tract_db"] = pudl.workspace.setup.get_defaults()[ + "censusdp1tract_db"] logger.info("pudl_settings being used: %s", pudl_settings) return pudl_settings diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index e5149d9856..5312063438 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -10,6 +10,7 @@ import logging from pathlib import Path +import pytest import sqlalchemy as sa import yaml @@ -47,9 +48,13 @@ def test_epacems_to_parquet( datapkg_bundle, pudl_settings_fixture, pudl_etl_params, - request + request, + live_dbs, ): """Attempt to convert a small amount of EPA CEMS data to parquet format.""" + if live_dbs: + pytest.skip("Don't attempt EPA CEMS to Parquet conversion with live DBs.") + epacems_datapkg_json = Path( pudl_settings_fixture['datapkg_dir'], pudl_etl_params['datapkg_bundle_name'], @@ -203,19 +208,19 @@ def test_excel_filename_eia923(self, pudl_datastore_fixture): extractor = pudl.extract.eia923.Extractor(pudl_datastore_fixture) self.expected_file_name( extractor=extractor, - page='plant_frame', + page='generation_fuel', year=2009, expected_name="EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS" ) self.expected_file_name( extractor=extractor, - page='energy_storage', + page='fuel_receipts_costs', year=2019, expected_name="EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx" ) self.expected_file_name( extractor=extractor, - page='puerto_rico', + page='boiler_fuel', year=2012, expected_name="EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx" ) diff --git a/test/integration/fast_output_test.py b/test/integration/fast_output_test.py index 877de1b49e..aaf17ce621 100644 --- a/test/integration/fast_output_test.py +++ b/test/integration/fast_output_test.py @@ -7,6 +7,7 @@ import pytest import pudl +import pudl.validate as pv logger = logging.getLogger(__name__) @@ -20,57 +21,75 @@ def fast_out(pudl_engine, pudl_datastore_fixture): freq="MS", fill_fuel_cost=True, roll_fuel_cost=True, - fill_net_gen=True + fill_net_gen=False, ) -def test_fuel_ferc1(fast_out): - """Pull FERC 1 Fuel Data.""" - logger.info("Pulling a year's worth of FERC1 Fuel data.") - fuel_df = fast_out.fuel_ferc1() - logger.info(f"Pulled {len(fuel_df)} Fuel FERC1 records.") - - -def test_plants_steam_ferc1(fast_out): - """Pull FERC 1 Steam Plants.""" - logger.info("Pulling FERC1 Steam Plants") - steam_df = fast_out.plants_steam_ferc1() - logger.info(f"Pulled{len(steam_df)} FERC1 steam plants records.") - - -def test_fbp_ferc1(fast_out): - """Calculate fuel consumption by plant for FERC 1 for one year of data.""" - logger.info("Calculating FERC1 Fuel by Plant.") - fbp_df = fast_out.fbp_ferc1() - logger.info(f"Generated {len(fbp_df)} FERC1 fuel by plant records.") - - -def test_bga_eia860(fast_out): - """Pull original EIA 860 Boiler Generator Associations.""" - logger.info("Pulling the EIA 860 Boiler Generator Associations.") - bga_df = fast_out.bga_eia860() - logger.info(f"Generated {len(bga_df)} BGA EIA 860 records.") - - -def test_own_eia860(fast_out): - """Read EIA 860 generator ownership data.""" - logger.info("Pulling the EIA 860 ownership data.") - own_df = fast_out.own_eia860() - logger.info(f"Generated {len(own_df)} EIA 860 ownership records.") - - -def test_gf_eia923(fast_out): - """Read EIA 923 generator fuel data. (not used in MCOE).""" - logger.info("Pulling the EIA 923 generator fuel data.") - gf_df = fast_out.gf_eia923() - logger.info(f"Generated {len(gf_df)} EIA 923 generator fuel records.") - - -def test_mcoe(fast_out): - """Calculate MCOE.""" - logger.info("Calculating MCOE.") - mcoe_df = fast_out.mcoe() - logger.info(f"Generated {len(mcoe_df)} MCOE records.") +@pytest.mark.parametrize( + "df_name", [ + "fuel_ferc1", + "plants_steam_ferc1", + "fbp_ferc1", + "plant_in_service_ferc1", + "plants_hydro_ferc1", + "plants_pumped_storage_ferc1", + "plants_small_ferc1", + "pu_ferc1", + "purchased_power_ferc1", + ]) +def test_ferc1_outputs(fast_out, df_name): + """Check that FERC 1 output functions work.""" + logger.info(f"Running fast_out.{df_name}()") + df = fast_out.__getattribute__(df_name)() + logger.info(f"Found {len(df)} rows in {df_name}") + assert not df.empty + + +@pytest.mark.parametrize( + "df1_name,df2_name,mult,kwargs", [ + ("gens_eia860", "bga_eia860", 1 / 1, {}), + ("gens_eia860", "gens_eia860", 1 / 1, {}), + ("gens_eia860", "own_eia860", 1 / 1, {}), + ("gens_eia860", "plants_eia860", 1 / 1, {}), + ("gens_eia860", "pu_eia860", 1 / 1, {}), + ("gens_eia860", "utils_eia860", 1 / 1, {}), + + ("gens_eia860", "bf_eia923", 12 / 1, {}), + ("gens_eia860", "frc_eia923", 12 / 1, {}), + ("gens_eia860", "gen_eia923", 12 / 1, {}), + # gen_allocated_eia923 currently only produces annual results. + ("gens_eia860", "gen_allocated_eia923", 1 / 1, {}), + ("gens_eia860", "gf_eia923", 12 / 1, {}), + + ("gens_eia860", "hr_by_unit", 12 / 1, {}), + ("gens_eia860", "hr_by_gen", 12 / 1, {}), + ("gens_eia860", "fuel_cost", 12 / 1, {}), + ("gens_eia860", "capacity_factor", 12 / 1, {}), + ("gens_eia860", "mcoe", 12 / 1, {"all_gens": False}), + ]) +def test_eia_outputs(fast_out, df1_name, df2_name, mult, kwargs): + """Check EIA output functions and date frequencies of output dataframes.""" + df1 = fast_out.__getattribute__(df1_name)() + logger.info(f"Running fast_out.{df2_name}() with freq={fast_out.freq}.") + df2 = fast_out.__getattribute__(df2_name)(**kwargs) + logger.info(f"Found {len(df2)} rows in {df2_name}") + logger.info(f"Checking {df2_name} date frequency relative to {df1_name}.") + pv.check_date_freq(df1, df2, mult) + + +@pytest.mark.parametrize( + "df_name,thresh", [ + ("mcoe", 0.9), + ] +) +def test_null_rows(fast_out, df_name, thresh): + """Check MCOE output for null rows resulting from bad merges.""" + # These are columns that only exist in earlier years + pv.no_null_rows( + df=fast_out.__getattribute__(df_name)(), + df_name=df_name, + thresh=thresh, + ) def test_eia861_etl(fast_out): @@ -91,24 +110,19 @@ def ferc714_out(fast_out, pudl_settings_fixture): ) -def test_ferc714_respondents_annualize(ferc714_out): - """Test annualized FERC 714 respondent outputs.""" - assert len(ferc714_out.annualize()) > 0 - - -def test_ferc714_respondents_categorize(ferc714_out): - """Test categorized FERC 714 respondent outputs.""" - assert len(ferc714_out.categorize()) > 0 - - -def test_ferc714_respondents_summarized(ferc714_out): - """Test summarized FERC 714 demand outputs.""" - assert len(ferc714_out.summarize_demand()) > 0 - - -def test_ferc714_respondents_fipsified(ferc714_out): - """Test FERC 714 respondent county FIPS associations.""" - assert len(ferc714_out.fipsify()) > 0 +@pytest.mark.parametrize( + "df_name", [ + "annualize", + "categorize", + "summarize_demand", + "fipsify", + ]) +def test_ferc714_outputs(ferc714_out, df_name): + """Test FERC 714 derived output methods.""" + logger.info(f"Running ferc714_out.{df_name}()") + df = ferc714_out.__getattribute__(df_name)() + logger.info(f"Found {len(df)} rows in {df_name}") + assert not df.empty @pytest.mark.xfail( diff --git a/test/unit/analysis/allocate_net_gen.py b/test/unit/analysis/allocate_net_gen.py new file mode 100644 index 0000000000..6a78fb3809 --- /dev/null +++ b/test/unit/analysis/allocate_net_gen.py @@ -0,0 +1,119 @@ +"""Unit tests for allocation of net generation.""" + +import numpy as np +import pandas as pd + +from pudl import helpers +from pudl.analysis import allocate_net_gen + +# Reusable input files... + +# inputs for example 1: +# multi-generator-plant with one primary fuel type that fully reports to the +# generation_eia923 table +GEN_1 = pd.DataFrame({ + 'plant_id_eia': [50307, 50307, 50307, 50307], + 'generator_id': ['GEN1', 'GEN2', 'GEN3', 'GEN4'], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'net_generation_mwh': [14.0, 1.0, 0.0, 0.0], +} +) + +GF_1 = pd.DataFrame({ + 'plant_id_eia': [50307, 50307, 50307, 50307], + 'prime_mover_code': ['ST', 'IC', 'IC', 'ST'], + 'fuel_type': ['NG', 'DFO', 'RFO', 'RFO'], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'net_generation_mwh': [15.0, 0.0, np.nan, np.nan], + 'fuel_consumed_mmbtu': [100000.0, 0.0, np.nan, np.nan], +}, +) + +GENS_1 = pd.DataFrame({ + 'plant_id_eia': [50307, 50307, 50307, 50307, 50307], + 'generator_id': ['GEN1', 'GEN2', 'GEN3', 'GEN4', 'GEN5'], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'prime_mover_code': ['ST', 'ST', 'ST', 'ST', 'IC'], + 'capacity_mw': [7.5, 2.5, 2.5, 4.3, 1.8], + 'fuel_type_count': [2, 2, 2, 2, 2], + 'energy_source_code_1': ['NG', 'NG', 'NG', 'NG', 'DFO'], + 'energy_source_code_2': [None, None, None, None, None], + 'energy_source_code_3': [None, None, None, None, None], + 'energy_source_code_4': [None, None, None, None, None], + 'energy_source_code_5': [None, None, None, None, None], + 'energy_source_code_6': [None, None, None, None, None], + 'planned_energy_source_code_1': [None, None, None, None, None], +}, +) + + +def test__associate_generator_tables_1(): + """Test associate_generator_tables function with example 1.""" + gen_assoc_1_expected = pd.DataFrame({ + 'plant_id_eia': [50307, 50307, 50307, 50307, 50307, 50307, 50307], + 'generator_id': ['GEN1', 'GEN2', 'GEN3', 'GEN4', 'GEN5', np.nan, np.nan], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'prime_mover_code': ['ST', 'ST', 'ST', 'ST', 'IC', 'IC', 'ST'], + 'capacity_mw': [7.5, 2.5, 2.5, 4.3, 1.8, np.nan, np.nan], + 'fuel_type_count': [2.0, 2.0, 2.0, 2.0, 2.0, np.nan, np.nan], + 'energy_source_code_num': ['energy_source_code_1', 'energy_source_code_1', 'energy_source_code_1', 'energy_source_code_1', 'energy_source_code_1', np.nan, np.nan], + 'fuel_type': ['NG', 'NG', 'NG', 'NG', 'DFO', 'RFO', 'RFO'], + 'net_generation_mwh_g_tbl': [14.0, 1.0, 0.0, 0.0, np.nan, np.nan, np.nan], + 'net_generation_mwh_gf_tbl': [15.0, 15.0, 15.0, 15.0, 0.0, np.nan, np.nan], + 'fuel_consumed_mmbtu': [100000.0, 100000.0, 100000.0, 100000.0, 0.0, np.nan, np.nan], + 'capacity_mw_fuel': [16.8, 16.8, 16.8, 16.8, 1.8, np.nan, np.nan], + 'net_generation_mwh_g_tbl_fuel': [15.0, 15.0, 15.0, 15.0, np.nan, np.nan, np.nan], + }, + ).pipe(helpers.convert_cols_dtypes, 'eia') + + gen_assoc_1_actual = ( + allocate_net_gen.associate_generator_tables( + gf=GF_1, gen=GEN_1, gens=GENS_1) + .pipe(helpers.convert_cols_dtypes, 'eia') + ) + + pd.testing.assert_frame_equal(gen_assoc_1_expected, gen_assoc_1_actual) + + +def test__allocate_gen_fuel_by_gen_pm_fuel_1(): + """Test allocate_gen_fuel_by_gen_pm_fuel function with example 1.""" + gen_pm_fuel_1_expected = pd.DataFrame( + { + 'plant_id_eia': [50307, 50307, 50307, 50307, 50307], + 'prime_mover_code': ['ST', 'ST', 'ST', 'ST', 'IC'], + 'fuel_type': ['NG', 'NG', 'NG', 'NG', 'DFO'], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'generator_id': ['GEN1', 'GEN2', 'GEN3', 'GEN4', 'GEN5'], + 'frac': [0.93, 0.066, 0.0, 0.0, 1.0], + 'net_generation_mwh_gf_tbl': [15.0, 15.0, 15.0, 15.0, 0.0], + 'net_generation_mwh_g_tbl': [14.0, 1.0, 0.0, 0.0, 0.0], + 'capacity_mw': [7.5, 2.5, 2.5, 4.3, 1.8], + 'fuel_consumed_mmbtu': [93333.33, 6666.66, 0.0, 0.0, 0.0], + 'net_generation_mwh': [14.0, 1.0, 0.0, 0.0, 0.0], + 'fuel_consumed_mmbtu_gf_tbl': [100000.0, 100000.0, 100000.0, 100000.0, 0.0], + }, + + ).pipe(helpers.convert_cols_dtypes, 'eia') + + gen_pm_fuel_1_actual = allocate_net_gen.allocate_gen_fuel_by_gen_pm_fuel( + gf=GF_1, gen=GEN_1, gens=GENS_1 + ) + + pd.testing.assert_frame_equal(gen_pm_fuel_1_expected, gen_pm_fuel_1_actual) + + # gen_pm_fuel_1_expected is an inputs into agg_by_generator().. so should I + # test this here?? + # + # testing the aggregation to the generator level for example 1. + # in this case, each generator has one prime mover and one fuel source so + # they are effectively the same. + gen_out_1_expected = pd.DataFrame({ + 'plant_id_eia': [50307, 50307, 50307, 50307, 50307], + 'generator_id': ['GEN1', 'GEN2', 'GEN3', 'GEN4', 'GEN5'], + 'report_date': ['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01', ], + 'net_generation_mwh': [14.0, 1.0, 0.0, 0.0, 0.0], + 'fuel_consumed_mmbtu': [93333.33, 6666.66, 0.0, 0.0, 0.0], + }, + ) + gen_out_1_actual = allocate_net_gen.agg_by_generator(gen_pm_fuel_1_actual) + pd.testing.assert_frame_equal(gen_out_1_expected, gen_out_1_actual) diff --git a/test/unit/dfc_test.py b/test/unit/dfc_test.py index 7fb6c22290..46cc9d33e8 100644 --- a/test/unit/dfc_test.py +++ b/test/unit/dfc_test.py @@ -38,19 +38,20 @@ def test_get_throws_key_error(self): self.assertRaises(KeyError, self.dfc.get, "unknown_df") def test_table_conflicts_are_prevented(self): - """Checks that subsequent calls to store throw TableExists exceptions.""" + """Checks that subsequent calls to store throw TableExistsError exceptions.""" test_df = pd.DataFrame({'x': [1, 2]}) self.dfc.store("test", test_df) - self.assertRaises(dfc.TableExists, self.dfc.store, "test", pd.DataFrame()) + self.assertRaises(dfc.TableExistsError, self.dfc.store, "test", pd.DataFrame()) pd.testing.assert_frame_equal(test_df, self.dfc.get("test")) def test_disk_overwrites_are_prevented(self): - """Checks that TableExists is thrown if the file already exists on disk.""" + """Checks that TableExistsError is thrown if the file already exists on disk.""" self.dfc.store("first", pd.DataFrame()) instance_id = dict(self.dfc.references())["first"] with open(os.path.join(self.temp_dir, instance_id.hex, "second"), "w") as f: f.write("random content") - self.assertRaises(dfc.TableExists, self.dfc.store, "second", pd.DataFrame()) + self.assertRaises(dfc.TableExistsError, self.dfc.store, + "second", pd.DataFrame()) def test_simple_df_retrieval(self): """Adds single dataframe to DFC and retrieves it.""" @@ -86,9 +87,10 @@ def test_add_reference(self): self.assertEqual(self.dfc.references(), extra_dfc.references()) def test_add_reference_prevents_collisions(self): - """Tests that attempts to add_reference to existing table throw TableExists.""" + """Tests that attempts to add_reference to existing table throw TableExistsError.""" self.dfc.store("test", pd.DataFrame()) - self.assertRaises(dfc.TableExists, self.dfc.add_reference, "test", uuid.uuid1()) + self.assertRaises(dfc.TableExistsError, + self.dfc.add_reference, "test", uuid.uuid1()) def test_two_dfc_do_not_collide(self): """Test that two dfc storing the same df will not collide.""" @@ -130,7 +132,7 @@ def test_overlapping_union_disallowed(self): dfc1.store("y", pd.DataFrame()) dfc2.store("y", pd.DataFrame()) dfc2.store("z", pd.DataFrame()) - self.assertRaises(dfc.TableExists, dfc1.union, dfc2) + self.assertRaises(dfc.TableExistsError, dfc1.union, dfc2) def test_to_dict(self): """Tests that dictionaries containing dataframes are correctly built from DFCs.""" diff --git a/test/unit/transform/__init__.py b/test/unit/transform/__init__.py new file mode 100644 index 0000000000..7c7668c15b --- /dev/null +++ b/test/unit/transform/__init__.py @@ -0,0 +1 @@ +"""Unit tests for the pudl.transform subpackage.""" diff --git a/test/unit/transform/eia923_test.py b/test/unit/transform/eia923_test.py new file mode 100644 index 0000000000..2325be16bd --- /dev/null +++ b/test/unit/transform/eia923_test.py @@ -0,0 +1,58 @@ +"""Unit tests for the pudl.transform.eia923 module.""" + +import pandas as pd + +import pudl.transform.eia923 as eia923 + + +def test__yearly_to_monthly_records__normal_values(): + """Test that monthly columns are reshaped to rows. + + input: + idx other_col value_january value_june + 100 0 1 2 + 101 3 4 5 + + output: + idx other_col report_month value + 100 0 1 1 + 100 0 6 2 + 101 3 1 4 + 101 3 6 5 + """ + test_df = pd.DataFrame([[0, 1, 2], + [3, 4, 5]], + columns=['other_col', 'value_january', 'value_june'], + index=[100, 101] + ) + actual = eia923._yearly_to_monthly_records(test_df) + expected = pd.DataFrame([[0, 1, 1], + [0, 6, 2], + [3, 1, 4], + [3, 6, 5]], + columns=['other_col', 'report_month', 'value'], + index=[100, 100, 101, 101] + ) + pd.testing.assert_frame_equal(expected, actual) + + +def test__yearly_to_monthly_records__empty_frame(): + """Test that empty dataframes still have correct column names. + + input: + idx other_col value_january value_june + + + output: + idx other_col report_month value + + """ + # empty dfs initialize with Index by default, so need to specify RangeIndex + test_df = pd.DataFrame([], columns=['other_col', 'value_january', 'value_june'], + index=pd.RangeIndex(start=0, stop=0, step=1)) + actual = eia923._yearly_to_monthly_records(test_df) + expected = pd.DataFrame([], columns=['other_col', 'report_month', 'value'], + index=pd.RangeIndex(start=0, stop=0, step=1)) + # report_month dtype changes from object to int64 + # but only because they are empty and get sent to default types during df.stack() + pd.testing.assert_frame_equal(expected, actual, check_dtype=False) diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 9806ff5207..f00a31cbc1 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -114,8 +114,8 @@ class TestZenodoFetcher(unittest.TestCase): "hash": "6f1ed002ab5595859014ebf0951522d9"}, ] } - PROD_EPACEMS_DOI = "10.5281/zenodo.4127055" - PROD_EPACEMS_ZEN_ID = 4127055 # This is the last numeric part of doi + PROD_EPACEMS_DOI = "10.5281/zenodo.4660268" + PROD_EPACEMS_ZEN_ID = 4660268 # This is the last numeric part of doi def setUp(self): """Constructs instance of mockable zenodo fetcher based on the MOCK_EPACEMS_DATAPACKAGE.""" diff --git a/test/validate/eia860_test.py b/test/validate/eia860_test.py index 53e51b386d..4270bb3704 100644 --- a/test/validate/eia860_test.py +++ b/test/validate/eia860_test.py @@ -1,6 +1,7 @@ """Validate post-ETL EIA 860 data and the associated derived outputs.""" import logging +import pandas as pd import pytest from scipy import stats @@ -9,11 +10,46 @@ logger = logging.getLogger(__name__) +def test_bga_eia860(pudl_out_eia, live_dbs): + """Test the boiler generator associations.""" + if not live_dbs: + pytest.skip("Data validation only works with a live PUDL DB.") + logger.info("Inferring complete boiler-generator associations...") + bga = pudl_out_eia.bga_eia860() + gens_simple = pudl_out_eia.gens_eia860()[['report_date', + 'plant_id_eia', + 'generator_id', + 'fuel_type_code_pudl']] + bga_gens = bga[['report_date', 'plant_id_eia', + 'unit_id_pudl', 'generator_id']].drop_duplicates() + + gens_simple = pd.merge(gens_simple, bga_gens, + on=['report_date', 'plant_id_eia', 'generator_id'], + validate='one_to_one') + units_simple = gens_simple.drop('generator_id', axis=1).drop_duplicates() + units_fuel_count = \ + units_simple.groupby( + ['report_date', + 'plant_id_eia', + 'unit_id_pudl'])['fuel_type_code_pudl'].count().reset_index() + units_fuel_count.rename( + columns={'fuel_type_code_pudl': 'fuel_type_count'}, inplace=True) + units_simple = pd.merge(units_simple, units_fuel_count, + on=['report_date', 'plant_id_eia', 'unit_id_pudl']) + num_multi_fuel_units = len(units_simple[units_simple.fuel_type_count > 1]) + multi_fuel_unit_fraction = num_multi_fuel_units / len(units_simple) + logger.warning( + f"{multi_fuel_unit_fraction:.0%} of generation units contain " + f"generators with differing primary fuels.") + + def test_own_eia860(pudl_out_eia, live_dbs): """Sanity checks for EIA 860 generator ownership data.""" if not live_dbs: pytest.skip("Data validation only works with a live PUDL DB.") - logger.info('Reading EIA 860 generator ownership data...') + if pudl_out_eia.freq is not None: + pytest.skip() + own_out = pudl_out_eia.own_eia860() if (own_out.fraction_owned > 1.0).any(): @@ -48,3 +84,117 @@ def test_own_eia860(pudl_out_eia, live_dbs): raise ValueError( f"{pct_missing}% of generators lack complete ownership data." ) + + +@pytest.mark.xfail(reason="There are 730 generators that report multiple operators") +def test_unique_operator_id(pudl_out_eia, live_dbs): + """ + Test that each generator in the ownership table has a unique operator ID. + + The ``utility_id_eia`` column is supposed to be the operator, which should only be + one utility for each generator in each report year, while ``owner_utility_id_eia`` + is supposed to represent the owner, of which there can be several for each + generator. We have a known issue with 2010 data. Many generators are being reported + with multiple ``utility_id_eia`` values, which appear to actually be the + ``owner_utility_id_eia``. + + Raises: + AssertionError: If there are generators with multiple reported operators + + """ + if not live_dbs: + pytest.skip("Data validation only works with a live PUDL DB.") + if pudl_out_eia.freq is not None: + pytest.skip() + + own_out = pudl_out_eia.own_eia860() + operator_check = ( + own_out.groupby( + ['report_date', 'plant_id_eia', 'generator_id'], + dropna=True) + [['utility_id_eia']].nunique() + ) + multi_operator = operator_check[operator_check.utility_id_eia > 1] + years = multi_operator.report_date.dt.year.unique() + if not multi_operator.empty: + raise AssertionError( + f"There are {len(multi_operator)} generator records across " + f"{list(years)} that are being reported as having multiple " + "operators." + ) + + +@pytest.mark.xfail(reason="There are 40 known inconsistent generator IDs.") +def test_generator_id_consistency(pudl_out_eia, live_dbs): + """ + Check if there are any plants that report inconsistent generator IDs. + + There are some instances in which a plant will report generator IDs + differently in different years, such that the IDs differ only in terms of + the case of letters, or non-alphanumeric characters. This test identifies + them. We haven't fixed them yet. + + """ + if not live_dbs: + pytest.skip("Data validation only works with a live PUDL DB.") + if pudl_out_eia.freq is not None: + pytest.skip() + + # All unique plant+generator ID combinations: + gen_ids = ( + pudl_out_eia.gens_eia860()[["plant_id_eia", "generator_id"]] + .drop_duplicates() + ) + # A simple generator_id w/o non-alphanumeric characters or lower-case letters: + gen_ids["simple_id"] = ( + gen_ids.generator_id.str.upper() + .str.replace(r"[^a-zA-Z0-9]", "", regex=True) + ) + + # Identify the set of simple IDs which map to multiple original generator IDs, + # meaning that within a single plant, there are generators whose original IDs + # only differ by non-alphanumeric characters, or by the case of the letters: + multiple_ids = ( + gen_ids.groupby(["plant_id_eia", "simple_id"])["generator_id"] + .nunique() > 1 + ) + + # Select only those generator IDs that have multiple values: + problem_generators = ( + gen_ids.set_index(["plant_id_eia", "simple_id"]) + .loc[multiple_ids] + .sort_index() + ) + if not problem_generators.empty: + errstr = f"Found {len(problem_generators)} ambiguous generator IDs." + raise ValueError(errstr) + + +def test_nuclear_units_are_generators(pudl_out_eia, live_dbs): + """Validate that all nuclear Unit IDs correspond to generator IDs.""" + if not live_dbs: + pytest.skip("Data validation only works with a live PUDL DB.") + if pudl_out_eia.freq is not None: + pytest.skip() + + all_nuke_gens = ( + pudl_out_eia.gens_eia860()[[ + "plant_id_eia", + "generator_id", + "energy_source_code_1", + ]] + .query("energy_source_code_1 == 'NUC'") + .set_index(["plant_id_eia", "generator_id"]) + ) + + all_nuke_gf = ( + pudl_out_eia.gf_eia923()[[ + "plant_id_eia", + "nuclear_unit_id", + "fuel_type", + ]] + .query("fuel_type == 'NUC'") + .assign(generator_id=lambda x: x.nuclear_unit_id.astype(int).astype(str)) + .set_index(["plant_id_eia", "generator_id"]) + ) + assert set(all_nuke_gf.index).issubset(all_nuke_gens.index) diff --git a/test/validate/eia_test.py b/test/validate/eia_test.py index 03300d563a..35a1b321a4 100644 --- a/test/validate/eia_test.py +++ b/test/validate/eia_test.py @@ -32,23 +32,25 @@ def test_no_null_cols_eia(pudl_out_eia, live_dbs, cols, df_name): @pytest.mark.parametrize( "df_name,raw_rows,monthly_rows,annual_rows", [ - ("utils_eia860", 94_895, 94_895, 94_895), - ("plants_eia860", 140_311, 140_311, 140_311), - ("pu_eia860", 139_443, 139_443, 139_443), - ("own_eia860", 65_271, 65_271, 65_271), - ("bga_eia860", 103_805, 103_805, 103_805), - ("gens_eia860", 404_158, 404_158, 404_158), - ("frc_eia923", 454_891, 190_115, 18_773), - ("gen_eia923", 476_052, 476_052, 39_671), - ("bf_eia923", 1_133_388, 946_056, 78_838), - ("gf_eia923", 1_551_264, 1_250_340, 104_195), + ("utils_eia860", 94_896, 94_896, 94_896), + ("plants_eia860", 155_045, 155_045, 155_045), + ("pu_eia860", 139_444, 139_444, 139_444), + ("own_eia860", 65_264, 65_264, 65_264), + ("bga_eia860", 105_764, 105_764, 105_764), + ("gens_eia860", 403_834, 403_834, 403_834), + ("frc_eia923", 517_078, 213_563, 21_338), + ("gen_eia923", 510_835, 510_835, 42_884), + ("bf_eia923", 1_207_976, 1_196_908, 100_866), + ("gf_eia923", 2_109_040, 2_099_362, 176_618), ]) -def test_minmax_rows(pudl_out_eia, - live_dbs, - raw_rows, - annual_rows, - monthly_rows, - df_name): +def test_minmax_rows( + pudl_out_eia, + live_dbs, + raw_rows, + annual_rows, + monthly_rows, + df_name +): """Verify that output DataFrames don't have too many or too few rows. Args: @@ -74,9 +76,9 @@ def test_minmax_rows(pudl_out_eia, _ = ( pudl_out_eia.__getattribute__(df_name)() .pipe(pv.check_min_rows, expected_rows=expected_rows, - margin=0.05, df_name=df_name) + margin=0.0, df_name=df_name) .pipe(pv.check_max_rows, expected_rows=expected_rows, - margin=0.05, df_name=df_name) + margin=0.0, df_name=df_name) ) @@ -86,14 +88,9 @@ def test_minmax_rows(pudl_out_eia, ("utils_eia860", ["report_date", "utility_id_eia"]), ("pu_eia860", ["report_date", "plant_id_eia"]), ("gens_eia860", ["report_date", "plant_id_eia", "generator_id"]), - ("bga_eia860", ["report_date", - "plant_id_eia", - "boiler_id", - "generator_id"]), - ("own_eia860", ["report_date", - "plant_id_eia", - "generator_id", - "owner_utility_id_eia"]), + ("bga_eia860", ["report_date", "plant_id_eia", "boiler_id", "generator_id"]), + ("own_eia860", ["report_date", "plant_id_eia", + "generator_id", "owner_utility_id_eia"]), ("gen_eia923", ["report_date", "plant_id_eia", "generator_id"]), ]) def test_unique_rows_eia(pudl_out_eia, live_dbs, unique_subset, df_name): diff --git a/test/validate/ferc1_test.py b/test/validate/ferc1_test.py index f98af179fd..691eb1c2c7 100644 --- a/test/validate/ferc1_test.py +++ b/test/validate/ferc1_test.py @@ -69,7 +69,7 @@ def test_no_null_cols_ferc1(pudl_out_ferc1, live_dbs, cols, df_name): ("plants_pumped_storage_ferc1", 689), ("plant_in_service_ferc1", 25_931), ("purchased_power_ferc1", 183_583), - ("pu_ferc1", 6797), + ("pu_ferc1", 6798), ]) def test_minmax_rows(pudl_out_ferc1, live_dbs, expected_rows, df_name): """Verify that output DataFrames don't have too many or too few rows. @@ -89,9 +89,9 @@ def test_minmax_rows(pudl_out_ferc1, live_dbs, expected_rows, df_name): _ = ( pudl_out_ferc1.__getattribute__(df_name)() .pipe(pv.check_min_rows, expected_rows=expected_rows, - margin=0.02, df_name=df_name) + margin=0.0, df_name=df_name) .pipe(pv.check_max_rows, expected_rows=expected_rows, - margin=0.02, df_name=df_name) + margin=0.0, df_name=df_name) ) diff --git a/test/validate/mcoe_test.py b/test/validate/mcoe_test.py index 270c474af2..34c62bb470 100644 --- a/test/validate/mcoe_test.py +++ b/test/validate/mcoe_test.py @@ -12,7 +12,6 @@ """ import logging -import pandas as pd import pytest from pudl import validate as pv @@ -41,88 +40,95 @@ def pudl_out_mcoe(pudl_out_eia, live_dbs): min_heat_rate=None, min_fuel_cost_per_mwh=None, min_cap_fact=None, - max_cap_fact=None + max_cap_fact=None, + all_gens=False, ) return pudl_out_eia -def test_bga(pudl_out_eia, live_dbs): - """Test the boiler generator associations.""" - if not live_dbs: - pytest.skip("Data validation only works with a live PUDL DB.") - logger.info("Inferring complete boiler-generator associations...") - bga = pudl_out_eia.bga() - gens_simple = pudl_out_eia.gens_eia860()[['report_date', - 'plant_id_eia', - 'generator_id', - 'fuel_type_code_pudl']] - bga_gens = bga[['report_date', 'plant_id_eia', - 'unit_id_pudl', 'generator_id']].drop_duplicates() - - gens_simple = pd.merge(gens_simple, bga_gens, - on=['report_date', 'plant_id_eia', 'generator_id'], - validate='one_to_one') - units_simple = gens_simple.drop('generator_id', axis=1).drop_duplicates() - units_fuel_count = \ - units_simple.groupby( - ['report_date', - 'plant_id_eia', - 'unit_id_pudl'])['fuel_type_code_pudl'].count().reset_index() - units_fuel_count.rename( - columns={'fuel_type_code_pudl': 'fuel_type_count'}, inplace=True) - units_simple = pd.merge(units_simple, units_fuel_count, - on=['report_date', 'plant_id_eia', 'unit_id_pudl']) - num_multi_fuel_units = len(units_simple[units_simple.fuel_type_count > 1]) - multi_fuel_unit_fraction = num_multi_fuel_units / len(units_simple) - logger.warning( - f"{multi_fuel_unit_fraction:.0%} of generation units contain " - f"generators with differing primary fuels.") - - ############################################################################### # Tests that look check for existence and uniqueness of some MCOE outputs: # These tests were inspired by some bad merges that generated multiple copies # of some records in the past... ############################################################################### @pytest.mark.parametrize( - "df_name,cols", [ - ("hr_by_unit", "all"), - ("hr_by_gen", "all"), - ("fuel_cost", "all"), - ("capacity_factor", "all"), - ("bga", "all"), - ("mcoe", "all"), - ] + "df_name", + ["hr_by_unit", "hr_by_gen", "fuel_cost", "capacity_factor", "mcoe"] ) -def test_no_null_cols_mcoe(pudl_out_mcoe, live_dbs, cols, df_name): +def test_no_null_cols_mcoe(pudl_out_mcoe, live_dbs, df_name): """Verify that output DataFrames have no entirely NULL columns.""" if not live_dbs: pytest.skip("Data validation only works with a live PUDL DB.") if pudl_out_mcoe.freq is None: pytest.skip() - pv.no_null_cols( - pudl_out_mcoe.__getattribute__(df_name)(), - cols=cols, df_name=df_name) + # These are columns that only exist in 2006 and older data, beyond the time + # for which we can calculate the MCOE: + deprecated_cols = [ + 'distributed_generation', + 'energy_source_1_transport_1', + 'energy_source_1_transport_2', + 'energy_source_1_transport_3', + 'energy_source_2_transport_1', + 'energy_source_2_transport_2', + 'energy_source_2_transport_3', + 'owned_by_non_utility', + 'reactive_power_output_mvar', + 'summer_capacity_estimate', + 'winter_capacity_estimate' + ] + df = pudl_out_mcoe.__getattribute__(df_name)() + cols = [col for col in df.columns if col not in deprecated_cols] + pv.no_null_cols(df, cols=cols, df_name=df_name) + + +@pytest.mark.parametrize( + "df_name,thresh", [ + ("mcoe", 0.8), + ] +) +def test_no_null_rows_mcoe(pudl_out_mcoe, live_dbs, df_name, thresh): + """ + Verify that output DataFrames have no overly NULL rows. + + Currently we only test the MCOE dataframe because it has lots of columns + and some complicated merges. For tables with fewer columns, the "index" + columns end up being most of them, and should probably be skipped. + + """ + if not live_dbs: + pytest.skip("Data validation only works with a live PUDL DB.") + if pudl_out_mcoe.freq is None: + pytest.skip() + + pv.no_null_rows( + df=pudl_out_mcoe.__getattribute__(df_name)(), + df_name=df_name, + thresh=thresh, + ) @pytest.mark.parametrize( "df_name,monthly_rows,annual_rows", [ - ("bga", 103_805, 103_805), - ("hr_by_unit", 302_256, 25_188), - ("hr_by_gen", 451_668, 37_639), - ("fuel_cost", 451_668, 37_639), - ("capacity_factor", 476_052, 39_671), - ("mcoe", 476_052, 39_671), + ("hr_by_unit", 315_324, 26_570), + ("hr_by_gen", 468_358, 39_323), + ("fuel_cost", 468_353, 39_318), + ("capacity_factor", 510_835, 42_884), + ("mcoe", 510_880, 42_884), ]) -def test_minmax_rows_mcoe(pudl_out_mcoe, live_dbs, - monthly_rows, annual_rows, df_name): +def test_minmax_rows_mcoe( + pudl_out_mcoe, + live_dbs, + monthly_rows, + annual_rows, + df_name +): """Verify that output DataFrames don't have too many or too few rows.""" if not live_dbs: pytest.skip("Data validation only works with a live PUDL DB.") if pudl_out_mcoe.freq is None: pytest.skip() - if (pudl_out_mcoe.freq == "MS"): + if pudl_out_mcoe.freq == "MS": expected_rows = monthly_rows else: assert pudl_out_mcoe.freq == "AS" @@ -130,15 +136,14 @@ def test_minmax_rows_mcoe(pudl_out_mcoe, live_dbs, _ = ( pudl_out_mcoe.__getattribute__(df_name)() .pipe(pv.check_min_rows, expected_rows=expected_rows, - margin=0.05, df_name=df_name) + margin=0.0, df_name=df_name) .pipe(pv.check_max_rows, expected_rows=expected_rows, - margin=0.05, df_name=df_name) + margin=0.0, df_name=df_name) ) @pytest.mark.parametrize( "df_name,unique_subset", [ - ("bga", ["report_date", "plant_id_eia", "boiler_id", "generator_id"]), ("hr_by_unit", ["report_date", "plant_id_eia", "unit_id_pudl"]), ("hr_by_gen", ["report_date", "plant_id_eia", "generator_id"]), ("fuel_cost", ["report_date", "plant_id_eia", "generator_id"]), diff --git a/tox.ini b/tox.ini index 464765bf1a..b6223b6d4f 100644 --- a/tox.ini +++ b/tox.ini @@ -93,10 +93,10 @@ extras = doc commands = bash -c 'rm -rf docs/_build docs/api/*.rst' - bash -c 'rm -rf docs/data_dictionary.rst' + bash -c 'rm -rf docs/data_dictionaries/pudl_db.rst' datapkg_to_rst \ -i src/pudl/package_data/meta/datapkg/datapackage.json \ - -o docs/data_dictionary.rst \ + -o docs/data_dictionaries/pudl_db.rst \ --ignore datasets accumulated_depreciation_ferc1 sphinx-apidoc \ --separate \ @@ -247,11 +247,11 @@ commands = ########################################################################### [pytest] testpaths = . -addopts = --capture=tee-sys --verbose -log_cli = true -log_cli_level = info +addopts = --verbose log_format = %(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s log_date_format= %Y-%m-%d %H:%M:%S +log_cli = true +log_cli_level = info doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS [flake8]