Skip to content

Commit

Permalink
update geo filter
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorb1 committed Dec 5, 2024
1 parent ae450bb commit b55ccd4
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 127 deletions.
11 changes: 6 additions & 5 deletions workflow/rules/model.smk
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ rule geographic_filter:
csv_files = expand('results/data/{csv}.csv', csv = OTOOLE_PARAMS),
params:
geographic_scope = config['geographic_scope'],
res_targets = config['re_targets']
res_targets = config['re_targets'],
nodes_to_remove = config["nodes_to_remove"],
in_dir = "results/data",
out_dir = "results/{scenario}/data"
output:
csv_files = expand('results/{{scenario}}/data/{csv}.csv', csv = OTOOLE_PARAMS),
# conda:
# '../envs/data_processing.yaml'
log:
log = 'results/{scenario}/logs/geographicFilter.log'
shell:
'python workflow/scripts/osemosys_global/geographic_filter.py 2> {log}'
script:
'../scripts/osemosys_global/geographic_filter.py'

rule copy_otoole_confg:
message:
Expand Down
297 changes: 175 additions & 122 deletions workflow/scripts/osemosys_global/geographic_filter.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,178 @@
# # Filter osemosys_global datapackaged based on user-defined geographic scope
# Filter osemosys_global datapackaged based on user-defined geographic scope

import pandas as pd
import os
from typing import Optional
from pathlib import Path
from configuration import ConfigFile, ConfigPaths
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

# CONFIGURATION PARAMETERS

config_paths = ConfigPaths()
config = ConfigFile('config')

scenario_name = config.get('scenario')
geographic_scope = config.get('geographic_scope')
remove_nodes = config.get('nodes_to_remove')

res_targets = config.get('re_targets')
if res_targets is not None:
res_targets = list(res_targets.keys())

output_data_dir = config_paths.output_data_dir
scenario_dir = config_paths.scenario_dir
scenario_data_dir = config_paths.scenario_data_dir

# FILTERING

if not geographic_scope: # Check for empty list (ie. World run)
geographic_scope = []
geographic_scope.append('INT') # 'INT' for international fuels added by default
international_fuels = ['COA', 'COG', 'GAS', 'OIL', 'PET', 'OTH', 'URN']

if not os.path.exists(scenario_data_dir):
os.makedirs(scenario_data_dir)

for each_csv in Path(output_data_dir).glob('*.csv'):
df = pd.read_csv(os.path.join(output_data_dir, each_csv))

if not df.empty:
# Do not filter if only element is international fuels
if geographic_scope[0] != 'INT':
if 'TECHNOLOGY' in df.columns:
df = df.loc[df['TECHNOLOGY'].str[3:6].isin(geographic_scope) |
df['TECHNOLOGY'].str[6:9].isin(geographic_scope) |
df['TECHNOLOGY'].str[8:11].isin(geographic_scope)]

# Filter out all international TRN techs
df = df.loc[~(
df['TECHNOLOGY'].str.startswith('TRN') &
(~(df['TECHNOLOGY'].str[3:6].isin(geographic_scope)) |
~(df['TECHNOLOGY'].str[8:11].isin(geographic_scope)))
)]

if remove_nodes:
df = df.loc[~(df['TECHNOLOGY'].str[3:8].isin(remove_nodes) |
df['TECHNOLOGY'].str[6:11].isin(remove_nodes) |
df['TECHNOLOGY'].str[8:13].isin(remove_nodes))]

if 'STORAGE' in df.columns:
df = df.loc[df['STORAGE'].str[3:6].isin(geographic_scope) |
df['STORAGE'].str[6:9].isin(geographic_scope) |
df['STORAGE'].str[8:11].isin(geographic_scope)]

if remove_nodes:
df = df.loc[~(df['STORAGE'].str[3:8].isin(remove_nodes) |
df['STORAGE'].str[6:11].isin(remove_nodes) |
df['STORAGE'].str[8:13].isin(remove_nodes))]

if 'FUEL' in df.columns:
if res_targets is None:
df = df.loc[df['FUEL'].str[3:6].isin(geographic_scope) |
df['FUEL'].str[6:9].isin(geographic_scope) |
df['FUEL'].isin(international_fuels)]

else:
df = df.loc[df['FUEL'].str[3:6].isin(geographic_scope) |
df['FUEL'].str[6:9].isin(geographic_scope) |
df['FUEL'].isin(res_targets) |
df['FUEL'].isin(international_fuels)]

if remove_nodes:
df = df.loc[~(df['FUEL'].str[3:8].isin(remove_nodes) |
df['FUEL'].str[6:11].isin(remove_nodes))]

if str(each_csv).split('/')[-1] == 'FUEL.csv':
if res_targets is None:
df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) |
df['VALUE'].str[6:9].isin(geographic_scope) |
df['VALUE'].isin(international_fuels)]

else:
df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) |
df['VALUE'].str[6:9].isin(geographic_scope) |
df['VALUE'].isin(res_targets) |
df['VALUE'].isin(international_fuels)]

if remove_nodes:
df = df.loc[~(df['VALUE'].str[3:8].isin(remove_nodes) |
df['VALUE'].str[6:11].isin(remove_nodes))]

if str(each_csv).split('/')[-1] == 'TECHNOLOGY.csv':
df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope) |
df['VALUE'].str[6:9].isin(geographic_scope) |
df['VALUE'].str[8:11].isin(geographic_scope)]
df = df.loc[~(
df['VALUE'].str.startswith('TRN') &
(~(df['VALUE'].str[3:6].isin(geographic_scope)) |
~(df['VALUE'].str[8:11].isin(geographic_scope)))
)]

if remove_nodes:
df = df.loc[~(df['VALUE'].str[3:8].isin(remove_nodes) |
df['VALUE'].str[6:11].isin(remove_nodes) |
df['VALUE'].str[8:13].isin(remove_nodes))]

if str(each_csv).split('/')[-1] == 'STORAGE.csv':
df = df.loc[df['VALUE'].str[3:6].isin(geographic_scope)]

if remove_nodes:
df = df.loc[~df['VALUE'].str[3:8].isin(remove_nodes)]

df.to_csv(os.path.join(os.path.join(scenario_data_dir, each_csv.name)), index = None)

logging.info('Geographic Filter Applied')
import logging

logger = logging.getLogger(__name__)

INT_FUELS = ["COA", "COG", "GAS", "OIL", "PET", "OTH", "URN"]


def filer(
df: pd.DataFrame,
name: str,
geo_scope: list[str],
remove_nodes: list[str],
res_targets: Optional[dict[str, list]] = None,
) -> pd.DataFrame:

if df.empty:
return df

# Do not filter if only element is international fuels
if len(geo_scope) == 1 and geo_scope[0] == "INT":
return df

if "TECHNOLOGY" in df.columns:
df = df.loc[
df["TECHNOLOGY"].str[3:6].isin(geographic_scope)
| df["TECHNOLOGY"].str[6:9].isin(geographic_scope)
| df["TECHNOLOGY"].str[8:11].isin(geographic_scope)
]

# Filter out all international TRN techs
df = df.loc[
~(
df["TECHNOLOGY"].str.startswith("TRN")
& (
~(df["TECHNOLOGY"].str[3:6].isin(geographic_scope))
| ~(df["TECHNOLOGY"].str[8:11].isin(geographic_scope))
)
)
]

if remove_nodes:
df = df.loc[
~(
df["TECHNOLOGY"].str[3:8].isin(remove_nodes)
| df["TECHNOLOGY"].str[6:11].isin(remove_nodes)
| df["TECHNOLOGY"].str[8:13].isin(remove_nodes)
)
]

if "STORAGE" in df.columns:
df = df.loc[
df["STORAGE"].str[3:6].isin(geographic_scope)
| df["STORAGE"].str[6:9].isin(geographic_scope)
| df["STORAGE"].str[8:11].isin(geographic_scope)
]

if remove_nodes:
df = df.loc[
~(
df["STORAGE"].str[3:8].isin(remove_nodes)
| df["STORAGE"].str[6:11].isin(remove_nodes)
| df["STORAGE"].str[8:13].isin(remove_nodes)
)
]

if "FUEL" in df.columns:
if res_targets is None:
df = df.loc[
df["FUEL"].str[3:6].isin(geographic_scope)
| df["FUEL"].str[6:9].isin(geographic_scope)
| df["FUEL"].isin(INT_FUELS)
]

else:
df = df.loc[
df["FUEL"].str[3:6].isin(geographic_scope)
| df["FUEL"].str[6:9].isin(geographic_scope)
| df["FUEL"].isin(res_targets)
| df["FUEL"].isin(INT_FUELS)
]

if remove_nodes:
df = df.loc[
~(
df["FUEL"].str[3:8].isin(remove_nodes)
| df["FUEL"].str[6:11].isin(remove_nodes)
)
]

if name == "FUEL":
if res_targets is None:
df = df.loc[
df["VALUE"].str[3:6].isin(geographic_scope)
| df["VALUE"].str[6:9].isin(geographic_scope)
| df["VALUE"].isin(INT_FUELS)
]

else:
df = df.loc[
df["VALUE"].str[3:6].isin(geographic_scope)
| df["VALUE"].str[6:9].isin(geographic_scope)
| df["VALUE"].isin(res_targets)
| df["VALUE"].isin(INT_FUELS)
]

if remove_nodes:
df = df.loc[
~(
df["VALUE"].str[3:8].isin(remove_nodes)
| df["VALUE"].str[6:11].isin(remove_nodes)
)
]

if name == "TECHNOLOGY":
df = df.loc[
df["VALUE"].str[3:6].isin(geographic_scope)
| df["VALUE"].str[6:9].isin(geographic_scope)
| df["VALUE"].str[8:11].isin(geographic_scope)
]
df = df.loc[
~(
df["VALUE"].str.startswith("TRN")
& (
~(df["VALUE"].str[3:6].isin(geographic_scope))
| ~(df["VALUE"].str[8:11].isin(geographic_scope))
)
)
]

if remove_nodes:
df = df.loc[
~(
df["VALUE"].str[3:8].isin(remove_nodes)
| df["VALUE"].str[6:11].isin(remove_nodes)
| df["VALUE"].str[8:13].isin(remove_nodes)
)
]

if name == "STORAGE":
df = df.loc[df["VALUE"].str[3:6].isin(geographic_scope)]

if remove_nodes:
df = df.loc[~df["VALUE"].str[3:8].isin(remove_nodes)]

return df


if __name__ == "__main__":

if "snakemake" in globals():
geographic_scope = snakemake.params.geographic_scope
res_targets = snakemake.params.res_targets
nodes_to_remove = snakemake.params.nodes_to_remove
in_dir = snakemake.params.in_dir
out_dir = snakemake.params.out_dir
else:
geographic_scope = ["IND"]
res_targets = {"T01": ["", [], "PCT", 2048, 2050, 95]}
nodes_to_remove = []
in_dir = "results/data"
out_dir = "results/data/Wrong/data"

geographic_scope.append("INT") # for international fuels added by default

if not Path(out_dir).exists():
Path(out_dir).mkdir(parents=True)

for each_csv in Path(in_dir).glob("*.csv"):
df = pd.read_csv(Path(each_csv))
df = filer(df, each_csv.stem, geographic_scope, nodes_to_remove, res_targets)
df.to_csv(Path(out_dir, each_csv.name), index=False)

logging.info("Geographic Filter Applied")

0 comments on commit b55ccd4

Please sign in to comment.