From b55ccd432bd4473dd6edb8d25f03848929fa1851 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Thu, 5 Dec 2024 13:07:57 -0800 Subject: [PATCH] update geo filter --- workflow/rules/model.smk | 11 +- .../osemosys_global/geographic_filter.py | 297 +++++++++++------- 2 files changed, 181 insertions(+), 127 deletions(-) diff --git a/workflow/rules/model.smk b/workflow/rules/model.smk index ff75704a..b4b46616 100644 --- a/workflow/rules/model.smk +++ b/workflow/rules/model.smk @@ -10,15 +10,16 @@ rule geographic_filter: csv_files = expand('results/data/{csv}.csv', csv = OTOOLE_PARAMS), params: geographic_scope = config['geographic_scope'], - res_targets = config['re_targets'] + res_targets = config['re_targets'], + nodes_to_remove = config["nodes_to_remove"], + in_dir = "results/data", + out_dir = "results/{scenario}/data" output: csv_files = expand('results/{{scenario}}/data/{csv}.csv', csv = OTOOLE_PARAMS), - # conda: - # '../envs/data_processing.yaml' log: log = 'results/{scenario}/logs/geographicFilter.log' - shell: - 'python workflow/scripts/osemosys_global/geographic_filter.py 2> {log}' + script: + '../scripts/osemosys_global/geographic_filter.py' rule copy_otoole_confg: message: diff --git a/workflow/scripts/osemosys_global/geographic_filter.py b/workflow/scripts/osemosys_global/geographic_filter.py index 546dc20b..bcde1b57 100644 --- a/workflow/scripts/osemosys_global/geographic_filter.py +++ b/workflow/scripts/osemosys_global/geographic_filter.py @@ -1,125 +1,178 @@ -# # Filter osemosys_global datapackaged based on user-defined geographic scope +# Filter osemosys_global datapackaged based on user-defined geographic scope import pandas as pd -import os +from typing import Optional from pathlib import Path -from configuration import ConfigFile, ConfigPaths -import logging -logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - -# CONFIGURATION PARAMETERS - -config_paths = ConfigPaths() -config = ConfigFile('config') - -scenario_name = config.get('scenario') -geographic_scope = config.get('geographic_scope') -remove_nodes = config.get('nodes_to_remove') - -res_targets = config.get('re_targets') -if res_targets is not None: - res_targets = list(res_targets.keys()) - -output_data_dir = config_paths.output_data_dir -scenario_dir = config_paths.scenario_dir -scenario_data_dir = config_paths.scenario_data_dir - -# FILTERING - -if not geographic_scope: # Check for empty list (ie. World run) - geographic_scope = [] -geographic_scope.append('INT') # 'INT' for international fuels added by default -international_fuels = ['COA', 'COG', 'GAS', 'OIL', 'PET', 'OTH', 'URN'] - -if not os.path.exists(scenario_data_dir): - os.makedirs(scenario_data_dir) - -for each_csv in Path(output_data_dir).glob('*.csv'): - df = pd.read_csv(os.path.join(output_data_dir, each_csv)) - - if not df.empty: - # Do not filter if only element is international fuels - if geographic_scope[0] != 'INT': - if 'TECHNOLOGY' in df.columns: - df = df.loc[df['TECHNOLOGY'].str[3:6].isin(geographic_scope) | - df['TECHNOLOGY'].str[6:9].isin(geographic_scope) | - df['TECHNOLOGY'].str[8:11].isin(geographic_scope)] - - # Filter out all international TRN techs - df = df.loc[~( - df['TECHNOLOGY'].str.startswith('TRN') & - (~(df['TECHNOLOGY'].str[3:6].isin(geographic_scope)) | - ~(df['TECHNOLOGY'].str[8:11].isin(geographic_scope))) - )] - - if remove_nodes: - df = df.loc[~(df['TECHNOLOGY'].str[3:8].isin(remove_nodes) | - df['TECHNOLOGY'].str[6:11].isin(remove_nodes) | - df['TECHNOLOGY'].str[8:13].isin(remove_nodes))] - - if 'STORAGE' in df.columns: - df = df.loc[df['STORAGE'].str[3:6].isin(geographic_scope) | - df['STORAGE'].str[6:9].isin(geographic_scope) | - df['STORAGE'].str[8:11].isin(geographic_scope)] - - if remove_nodes: - df = df.loc[~(df['STORAGE'].str[3:8].isin(remove_nodes) | - df['STORAGE'].str[6:11].isin(remove_nodes) | - df['STORAGE'].str[8:13].isin(remove_nodes))] - - if 'FUEL' in df.columns: - if res_targets is None: - df = df.loc[df['FUEL'].str[3:6].isin(geographic_scope) | - df['FUEL'].str[6:9].isin(geographic_scope) | - df['FUEL'].isin(international_fuels)] - - else: - df = df.loc[df['FUEL'].str[3:6].isin(geographic_scope) | - df['FUEL'].str[6:9].isin(geographic_scope) | - df['FUEL'].isin(res_targets) | - df['FUEL'].isin(international_fuels)] - - if remove_nodes: - df = df.loc[~(df['FUEL'].str[3:8].isin(remove_nodes) | - df['FUEL'].str[6:11].isin(remove_nodes))] - - if str(each_csv).split('/')[-1] == 'FUEL.csv': - if res_targets is None: - df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) | - df['VALUE'].str[6:9].isin(geographic_scope) | - df['VALUE'].isin(international_fuels)] - - else: - df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) | - df['VALUE'].str[6:9].isin(geographic_scope) | - df['VALUE'].isin(res_targets) | - df['VALUE'].isin(international_fuels)] - - if remove_nodes: - df = df.loc[~(df['VALUE'].str[3:8].isin(remove_nodes) | - df['VALUE'].str[6:11].isin(remove_nodes))] - - if str(each_csv).split('/')[-1] == 'TECHNOLOGY.csv': - df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) | - df['VALUE'].str[6:9].isin(geographic_scope) | - df['VALUE'].str[8:11].isin(geographic_scope)] - df = df.loc[~( - df['VALUE'].str.startswith('TRN') & - (~(df['VALUE'].str[3:6].isin(geographic_scope)) | - ~(df['VALUE'].str[8:11].isin(geographic_scope))) - )] - - if remove_nodes: - df = df.loc[~(df['VALUE'].str[3:8].isin(remove_nodes) | - df['VALUE'].str[6:11].isin(remove_nodes) | - df['VALUE'].str[8:13].isin(remove_nodes))] - - if str(each_csv).split('/')[-1] == 'STORAGE.csv': - df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope)] - - if remove_nodes: - df = df.loc[~df['VALUE'].str[3:8].isin(remove_nodes)] - - df.to_csv(os.path.join(os.path.join(scenario_data_dir, each_csv.name)), index = None) - -logging.info('Geographic Filter Applied') \ No newline at end of file +import logging + +logger = logging.getLogger(__name__) + +INT_FUELS = ["COA", "COG", "GAS", "OIL", "PET", "OTH", "URN"] + + +def filer( + df: pd.DataFrame, + name: str, + geo_scope: list[str], + remove_nodes: list[str], + res_targets: Optional[dict[str, list]] = None, +) -> pd.DataFrame: + + if df.empty: + return df + + # Do not filter if only element is international fuels + if len(geo_scope) == 1 and geo_scope[0] == "INT": + return df + + if "TECHNOLOGY" in df.columns: + df = df.loc[ + df["TECHNOLOGY"].str[3:6].isin(geographic_scope) + | df["TECHNOLOGY"].str[6:9].isin(geographic_scope) + | df["TECHNOLOGY"].str[8:11].isin(geographic_scope) + ] + + # Filter out all international TRN techs + df = df.loc[ + ~( + df["TECHNOLOGY"].str.startswith("TRN") + & ( + ~(df["TECHNOLOGY"].str[3:6].isin(geographic_scope)) + | ~(df["TECHNOLOGY"].str[8:11].isin(geographic_scope)) + ) + ) + ] + + if remove_nodes: + df = df.loc[ + ~( + df["TECHNOLOGY"].str[3:8].isin(remove_nodes) + | df["TECHNOLOGY"].str[6:11].isin(remove_nodes) + | df["TECHNOLOGY"].str[8:13].isin(remove_nodes) + ) + ] + + if "STORAGE" in df.columns: + df = df.loc[ + df["STORAGE"].str[3:6].isin(geographic_scope) + | df["STORAGE"].str[6:9].isin(geographic_scope) + | df["STORAGE"].str[8:11].isin(geographic_scope) + ] + + if remove_nodes: + df = df.loc[ + ~( + df["STORAGE"].str[3:8].isin(remove_nodes) + | df["STORAGE"].str[6:11].isin(remove_nodes) + | df["STORAGE"].str[8:13].isin(remove_nodes) + ) + ] + + if "FUEL" in df.columns: + if res_targets is None: + df = df.loc[ + df["FUEL"].str[3:6].isin(geographic_scope) + | df["FUEL"].str[6:9].isin(geographic_scope) + | df["FUEL"].isin(INT_FUELS) + ] + + else: + df = df.loc[ + df["FUEL"].str[3:6].isin(geographic_scope) + | df["FUEL"].str[6:9].isin(geographic_scope) + | df["FUEL"].isin(res_targets) + | df["FUEL"].isin(INT_FUELS) + ] + + if remove_nodes: + df = df.loc[ + ~( + df["FUEL"].str[3:8].isin(remove_nodes) + | df["FUEL"].str[6:11].isin(remove_nodes) + ) + ] + + if name == "FUEL": + if res_targets is None: + df = df.loc[ + df["VALUE"].str[3:6].isin(geographic_scope) + | df["VALUE"].str[6:9].isin(geographic_scope) + | df["VALUE"].isin(INT_FUELS) + ] + + else: + df = df.loc[ + df["VALUE"].str[3:6].isin(geographic_scope) + | df["VALUE"].str[6:9].isin(geographic_scope) + | df["VALUE"].isin(res_targets) + | df["VALUE"].isin(INT_FUELS) + ] + + if remove_nodes: + df = df.loc[ + ~( + df["VALUE"].str[3:8].isin(remove_nodes) + | df["VALUE"].str[6:11].isin(remove_nodes) + ) + ] + + if name == "TECHNOLOGY": + df = df.loc[ + df["VALUE"].str[3:6].isin(geographic_scope) + | df["VALUE"].str[6:9].isin(geographic_scope) + | df["VALUE"].str[8:11].isin(geographic_scope) + ] + df = df.loc[ + ~( + df["VALUE"].str.startswith("TRN") + & ( + ~(df["VALUE"].str[3:6].isin(geographic_scope)) + | ~(df["VALUE"].str[8:11].isin(geographic_scope)) + ) + ) + ] + + if remove_nodes: + df = df.loc[ + ~( + df["VALUE"].str[3:8].isin(remove_nodes) + | df["VALUE"].str[6:11].isin(remove_nodes) + | df["VALUE"].str[8:13].isin(remove_nodes) + ) + ] + + if name == "STORAGE": + df = df.loc[df["VALUE"].str[3:6].isin(geographic_scope)] + + if remove_nodes: + df = df.loc[~df["VALUE"].str[3:8].isin(remove_nodes)] + + return df + + +if __name__ == "__main__": + + if "snakemake" in globals(): + geographic_scope = snakemake.params.geographic_scope + res_targets = snakemake.params.res_targets + nodes_to_remove = snakemake.params.nodes_to_remove + in_dir = snakemake.params.in_dir + out_dir = snakemake.params.out_dir + else: + geographic_scope = ["IND"] + res_targets = {"T01": ["", [], "PCT", 2048, 2050, 95]} + nodes_to_remove = [] + in_dir = "results/data" + out_dir = "results/data/Wrong/data" + + geographic_scope.append("INT") # for international fuels added by default + + if not Path(out_dir).exists(): + Path(out_dir).mkdir(parents=True) + + for each_csv in Path(in_dir).glob("*.csv"): + df = pd.read_csv(Path(each_csv)) + df = filer(df, each_csv.stem, geographic_scope, nodes_to_remove, res_targets) + df.to_csv(Path(out_dir, each_csv.name), index=False) + + logging.info("Geographic Filter Applied")