Merge pull request #1203 from blue-marble/develop

GridPath v2025.1.0
blue-marble · Jan 17, 2025 · 4be194a · 4be194a
2 parents 6ee5379 + 1700054
commit 4be194a
Show file tree

Hide file tree

Showing 2,206 changed files with 1,335,870 additions and 239,945 deletions.
diff --git a/.github/workflows/test_gridpath.yml b/.github/workflows/test_gridpath.yml
@@ -22,6 +22,9 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
+    - name: Install sqlite3 3.45.0
+      run: |
+        bash ./.github/workflows/upgrade_sqlite_on_linux.sh
     - name: Set up Python
       uses: actions/setup-python@v3
       with:

diff --git a/.github/workflows/upgrade_sqlite_on_linux.sh b/.github/workflows/upgrade_sqlite_on_linux.sh
@@ -0,0 +1,14 @@
+# required to support UNIXEPOCH
+# installing build: 3.45.0
+wget https://www.sqlite.org/2024/sqlite-autoconf-3450000.tar.gz
+# unzipping build
+tar -xvzf sqlite-autoconf-3450000.tar.gz
+
+# below steps are for installing the build in /usr/local/bin
+cd sqlite-autoconf-3450000 || exit
+./configure
+make
+sudo make install
+
+# remove the previous version
+sudo apt-get remove -y --auto-remove sqlite3
diff --git a/.gitignore b/.gitignore
@@ -95,14 +95,10 @@ ENV/
 .idea
 
 # Don't track example run CSV results
-# Cap-expansion run
 examples/*/results/*.csv
-# Multi-horizon, single-stage prod cost run
 examples/*/*/results/*.csv
-# Multi-horizon, multi-stage prod cost run
 examples/*/*/*/results/*.csv
-# RA iteration runs
-# Weather/hydro/availability iterations + subproblems
+examples/*/*/*/*/results/*.csv
 examples/*/*/*/*/*/results/*.csv
 
 # Don't track pass-through inputs directory in multi-stage runs

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 include gridpath/project/operations/operational_types/opchar_param_requirements.csv
 include db/db_schema.sql
 include db/data/*.*
+include gridpath_data_toolkit/raw_data_db_schema.sql
diff --git a/data_toolkit/README.md b/data_toolkit/README.md
@@ -0,0 +1,68 @@
+## GridPath Data Toolkit
+
+This is a pre-release of the GridPath Data Toolkit. The Toolkit includes 
+previously available functionality from the GridPath RA Data Toolkit that 
+generates GridPath input CSV files for use in resource adequacy studies, 
+including weather-dependent load profiles as well as wind and solar profiles,
+generator availabilities, and hydro conditions. New functionality takes 
+advantage of the public data available in the PUDL database maintained by 
+Catalyst Cooperative.
+
+GridPath can currently utilize the following open datasets available from PUDL:
+* **Form EIA-860**: generator-level specific information about existing and 
+planned generators
+* **Form EIA-930**: hourly operating data about the high-voltage bulk electric 
+  power grid in the Lower 48 states collected from the electricity balancing authorities (BAs) that operate the grid
+* **EIA AEO** *Table 54 (Electric Power Projections by Electricity Market 
+  Module Region)*: fuel price forecasts
+* **GridPath RA Toolkit** variable generation profiles created for the 2026 
+  Western RA Study: these include hourly wind profiles by WECC BA based on 
+  assumed 2026 wind buildout for weather years 2007-2014 and hourly solar 
+  profiles by WECC BA based on assumed 2026 buildout (as of 2021) for weather 
+  years 1998-2019
+
+## Usage
+### Download data from PUDL
+
+```bash
+gridpath_get_pudl_data
+```
+Downloads data to *./pudl_download* by default.
+This will download the *pudl.sqlite* database as well as the RA Toolkit 
+wind and solar profiles Parquet file, and the EIA930 hourly interchange 
+data Parquet file. See *--help* menu for options. Note these are relatively 
+large files and the download process may take a few minutes depending on 
+your internet speed.
+
+### Get subset of raw data for GridPath from downloaded PUDL data
+
+```bash
+gridpath_pudl_to_gridpath_raw
+```
+Gets subset of the downloaded PUDL data and converts it to GridPath raw data format.
+This will create the following files in the user-specified raw data directory:
+* pudl_eia860_generators.csv
+* pudl_eia930_hourly_interchange.csv
+* pudl_eiaaeo_fuel_prices.csv
+* pudl_ra_toolkit_var_profiles.csv
+
+### Get other GridPath RA Toolkit data not yet on PUDL
+
+```bash
+gridpath_get_ra_toolkit_data_raw
+
+```
+Also get the load data and hydro data from the GridPath RA Toolkit dataset. 
+Note that this is the same dataset but in a changed format from what is on the 
+GridLab RA Toolkit website and is currently stored on Blue Marble's Google Drive.
+* ra_toolkit_load.csv
+* ra_toolkit_hydro.csv
+
+
+### Process the data with the GridPath Data Toolkit
+
+```bash
+gridpath_run_data_toolkit --settings_csv PATH/TO/SETTINGS
+```
+
+See the *Using the GridPath Data Toolkit* section of the GridPath documentation.
diff --git a/data_toolkit/__init__.py b/data_toolkit/__init__.py
@@ -0,0 +1,8 @@
+"""
+The **GridPath Data Toolkit** provides functionality to create GridPath scenario
+inputs from raw data. The user may  provide their own data and use the
+Toolkit to convert the data to GridPath CSV input format for use in buildling a
+GridPath database. The Toolkit also includes functionality to download raw data
+from `PUDL <https://catalyst.coop/pudl/>`__ and from the
+`GridPath RA Toolkit <https://gridlab.org/gridpathratoolkit/>`__.
+"""
diff --git a/data_toolkit/common_methods.py b/data_toolkit/common_methods.py
@@ -0,0 +1,38 @@
+# Copyright 2016-2025 Blue Marble Analytics LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+
+"""
+
+import os.path
+
+
+def create_csv_generic(
+    filename,
+    df,
+    overwrite,
+):
+    """ """
+    if not os.path.exists(filename) or overwrite:
+        df.to_csv(
+            filename,
+            mode="w",
+            index=False,
+        )
+    else:
+        raise ValueError(
+            f"The file {filename} already exists and overwrite has not been "
+            "indicated."
+        )
diff --git a/db/utilities/ra_toolkit/__init__.py → data_toolkit/fuels/__init__.py b/db/utilities/ra_toolkit/__init__.py → data_toolkit/fuels/__init__.py
diff --git a/data_toolkit/fuels/eiaaeo_fuel_price_input_csvs.py b/data_toolkit/fuels/eiaaeo_fuel_price_input_csvs.py
@@ -0,0 +1,151 @@
+# Copyright 2016-2024 Blue Marble Analytics LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+EIA AEO Fuel Prices
+*******************
+
+Create GridPath fuel price inputs (fuel_scenario_id) based on the EIA AEO.
+
+.. warning:: The user is reponsible for ensuring that all prices and costs in
+    their model are in a consistent real currency year.
+
+=====
+Usage
+=====
+
+>>> gridpath_run_data_toolkit --single_step eiaaeo_fuel_price_input_csvs --settings_csv PATH/TO/SETTINGS/CSV
+
+===================
+Input prerequisites
+===================
+
+Thios module assumes the following raw input database tables have been
+populated:
+    * raw_data_eiaaeo_fuel_prices
+    * user_defined_eiaaeo_region_key
+
+=========
+Settings
+=========
+    * database
+    * output_directory
+    * model_case
+    * report_year
+    * fuel_price_id
+
+"""
+
+import csv
+from argparse import ArgumentParser
+import os.path
+import pandas as pd
+import sys
+
+from db.common_functions import connect_to_database
+
+
+def parse_arguments(args):
+    """
+    :param args: the script arguments specified by the user
+    :return: the parsed known argument values (<class 'argparse.Namespace'>
+    Python object)
+
+    Parse the known arguments.
+    """
+    parser = ArgumentParser(add_help=True)
+
+    parser.add_argument("-db", "--database", default="../../db/open_data_raw.db")
+
+    parser.add_argument(
+        "-o",
+        "--output_directory",
+        default="../../db/csvs_open_data/fuels/fuel_prices",
+    )
+    parser.add_argument("-fuel_price_id", "--fuel_price_scenario_id", default=1)
+    parser.add_argument(
+        "-case",
+        "--model_case",
+        default="aeo2022",
+    )
+    parser.add_argument("-r_yr", "--report_year", default=2023)
+
+    parser.add_argument("-q", "--quiet", default=False, action="store_true")
+
+    parsed_arguments = parser.parse_known_args(args=args)[0]
+
+    return parsed_arguments
+
+
+def get_fuel_prices(
+    conn, output_directory, subscenario_id, subscenario_name, report_year, model_case
+):
+    """ """
+
+    sql = f"""
+    SELECT gridpath_generic_fuel || '_' || fuel_region as fuel, projection_year as period, 
+    fuel_cost_real_per_mmbtu_eiaaeo as fuel_price_per_mmbtu
+    FROM raw_data_eiaaeo_fuel_prices
+    JOIN (SELECT DISTINCT gridpath_generic_fuel, fuel_type_eiaaeo FROM user_defined_eia_gridpath_key) USING (fuel_type_eiaaeo)
+    JOIN user_defined_eiaaeo_region_key using (
+    electricity_market_module_region_eiaaeo)
+    WHERE report_year = {report_year}
+    AND model_case_eiaaeo = '{model_case}'
+    ORDER BY fuel, period
+    """
+
+    df = pd.read_sql(sql, conn)
+    month_df_list = []
+    for month in range(1, 13):
+        month_df = df
+        month_df["month"] = month
+        cols = month_df.columns.tolist()
+        cols = cols[:2] + [cols[3]] + [cols[2]]
+        month_df = month_df[cols]
+
+        month_df_list.append(month_df)
+
+    final_df = pd.concat(month_df_list)
+
+    final_df.to_csv(
+        os.path.join(output_directory, f"{subscenario_id}_" f"{subscenario_name}.csv"),
+        index=False,
+    )
+
+
+def main(args=None):
+    if args is None:
+        args = sys.argv[1:]
+
+    parsed_args = parse_arguments(args=args)
+
+    if not parsed_args.quiet:
+        print("Creating fuel prices...")
+
+    os.makedirs(parsed_args.output_directory, exist_ok=True)
+
+    conn = connect_to_database(db_path=parsed_args.database)
+
+    get_fuel_prices(
+        conn=conn,
+        output_directory=parsed_args.output_directory,
+        subscenario_id=parsed_args.fuel_price_scenario_id,
+        subscenario_name=parsed_args.model_case,
+        report_year=parsed_args.report_year,
+        model_case=parsed_args.model_case,
+    )
+
+
+if __name__ == "__main__":
+    main()