Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plymouth analysis #118

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
"""
Combine, filter and process the HP suitability data for Plymouth
"""

import pandas as pd
import polars as pl
import geojson
import s3fs

fs = s3fs.S3FileSystem()
import geopandas as gpd

from asf_heat_pump_suitability.getters import get_target
from asf_heat_pump_suitability.getters import get_datasets
from asf_heat_pump_suitability import config

# Suitability data per LSOA
suitablitity_per_lsoa_file = "s3://asf-heat-pump-suitability/outputs/2023Q4/suitability/20250114_2023_Q4_heat_pump_suitability_per_lsoa.parquet"

# The suitability of properties
suitability_per_property_file = "s3://asf-heat-pump-suitability/outputs/2023Q4/suitability/20250115_2023_Q4_heat_pump_suitability_per_property.parquet"

# Garden size per property
garden_size_file = "s3://asf-heat-pump-suitability/outputs/2023Q4/gardens/20250114_2023_Q4_EPC_garden_size_estimates_EWS_deduplicated.parquet"

column_rename_dict = {
"ASHP_S_avg_score_weighted": "ASHP - Conventional",
"ASHP_N_avg_score_weighted": "ASHP - Nesta",
"GSHP_S_avg_score_weighted": "GSHP - Conventional",
"GSHP_N_avg_score_weighted": "GSHP - Nesta",
"SGL_S_avg_score_weighted": "SGL - Conventional",
"SGL_N_avg_score_weighted": "SGL - Nesta",
"HN_S_avg_score_weighted": "HN - Conventional",
"HN_N_avg_score_weighted": "HN - Nesta",
}


def load_ew_boundaries(df: pd.DataFrame) -> gpd.GeoDataFrame:

gdf = get_datasets.load_gdf_ons_lsoa_bounds()
gdf = gdf.to_crs(epsg=4326) # convert to WGS84

gdf = gdf.merge(df, how="right", left_on="LSOA21CD", right_on="lsoa")

return gdf


def get_tech_suitability_overall_4_type(
ashp,
gshp,
sgl,
hn,
ashp_high_thresh,
gshp_high_thresh,
sgl_high_thresh,
hn_high_thresh,
):
if (ashp >= ashp_high_thresh) | (gshp >= gshp_high_thresh):
ind_suit = True
else:
ind_suit = False
if (sgl >= sgl_high_thresh) | (hn >= hn_high_thresh):
shared_suit = True
else:
shared_suit = False

if ind_suit & shared_suit:
return "Both individual and shared suitable"
elif ind_suit:
return "Individual suitable only"
elif shared_suit:
return "Shared suitable only"
else:
return "Neither suitable"


def get_tech_suitability_overall_type(ashp, hn, ashp_high_thresh, hn_high_thresh):
if ashp >= ashp_high_thresh:
ind_suit = True
else:
ind_suit = False
if hn >= hn_high_thresh:
shared_suit = True
else:
shared_suit = False

if ind_suit & shared_suit:
return "Both ASHP and HN suitable"
elif ind_suit:
return "ASHP suitable only"
elif shared_suit:
return "HN suitable only"
else:
return "Neither ASHP or HN suitable"


# Ad hoc manually created categories for Plymouth
def get_tech_suitability_manual(ashp, hn):
if (ashp < 0.68) & (hn > 0.7):
return "HN suitable"
else:
if ashp < 0.82:
return "Multiple technologies may be feasible"
else:
return "ASHP suitable"
Comment on lines +102 to +105
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be misinterpreting the use case of this function but is there an outcome missing where none is suitable?

E.g. it looks like if ashp == 0 and hn == 0 for example, the function would return "Multiple technologies may be feasible" even though none are.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know what you mean. I think the language is because we don't want to say that HPs are never suitable. And either way, for Plymouth (and I think most places) these values are never that low anyway.



def add_tenure_proportions(lsoa_df: pd.DataFrame) -> pd.DataFrame:

# Join with tenure data
census_tenure = get_target.transform_df_target_tenure()
lsoa_df_tenure = lsoa_df.merge(census_tenure.to_pandas(), how="left", on="lsoa")

# Get proportions of each tenure type
tenure_types = ["owner-occupied", "rental (social)", "rental (private)"]
lsoa_df_tenure["total_census_tenure_properties"] = lsoa_df_tenure[tenure_types].sum(
axis=1
)
for tenure_type in tenure_types:
lsoa_df_tenure[f"Proportion {tenure_type}"] = (
lsoa_df_tenure[tenure_type]
/ lsoa_df_tenure["total_census_tenure_properties"]
)

return lsoa_df_tenure


def get_lsoa_proportion_flats(per_property_df: pd.DataFrame) -> pd.DataFrame:

# Proportion of flats per LSOA
per_property_df["is_flat"] = (
per_property_df["property_type"] == "Flat, maisonette or apartment"
).astype(int)
per_property_df_grouped = per_property_df.groupby("lsoa")
num_flats_per_lsoa = per_property_df_grouped["is_flat"].sum().reset_index()
num_props_per_lsoa = per_property_df_grouped.size().reset_index()
flats_per_lsoa = num_flats_per_lsoa.merge(num_props_per_lsoa, on="lsoa")
flats_per_lsoa["%flats"] = flats_per_lsoa["is_flat"] * 100 / flats_per_lsoa[0]

return flats_per_lsoa


if __name__ == "__main__":

# Suitability per LSOA
suitability_data = pd.read_parquet(suitablitity_per_lsoa_file)

# Filter data for a particular place

place_name = "Plymouth" # Word in the LSOA name
plymouth_lsoas_df = suitability_data[
suitability_data["lsoa_name"].apply(lambda x: place_name in str(x))
]
plymouth_lsoas_list = list(plymouth_lsoas_df["lsoa"].unique())

plymouth_lsoas_df = add_tenure_proportions(plymouth_lsoas_df)

# Add geospatial data
plymouth_lsoas_gdf = load_ew_boundaries(plymouth_lsoas_df)

# Features per property

per_prop_data = pl.read_parquet(
suitability_per_property_file,
columns=[
"UPRN",
"lsoa",
"LATITUDE",
"LONGITUDE",
"UPRN_duplicated",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be removed from the suitability per property file. Not sure why it's still in there, I thought I had fixed the issue.

"CURRENT_ENERGY_RATING",
"property_type",
"off_gas",
"garden_area_m2",
"ruc_two_fold",
"households_per_km2",
"heatpump_installation_percentage",
"has_anchor_property",
"tenure",
"in_protected_area",
"lad_conservation_area_data_available_ew",
"listed_building",
"build_year",
"ASHP_S_avg_score",
"ASHP_N_avg_score",
"GSHP_S_avg_score",
"GSHP_N_avg_score",
"SGL_S_avg_score",
"SGL_N_avg_score",
"HN_S_avg_score",
"HN_N_avg_score",
],
)

per_prop_plymouth = per_prop_data.filter(
pl.col("lsoa").is_in(plymouth_lsoas_list)
).to_pandas()

# Unique features per LSOA
lsoa_features = per_prop_plymouth.drop_duplicates(subset=["lsoa"])[
[
"lsoa",
"ruc_two_fold",
"has_anchor_property",
"heatpump_installation_percentage",
"households_per_km2",
]
]

# Join with lsoa features
plymouth_lsoas_gdf = plymouth_lsoas_gdf.merge(lsoa_features, how="left", on="lsoa")

flats_per_lsoa = get_lsoa_proportion_flats(per_prop_plymouth)
plymouth_lsoas_gdf = plymouth_lsoas_gdf.merge(
flats_per_lsoa[["lsoa", "%flats"]], how="left", on="lsoa"
)

# Alternative way to get proportion of flats from census
census_prop_type = get_target.load_transform_df_target_property_type_ew()
census_prop_type_plymouth = census_prop_type.filter(
pl.col("lsoa").is_in(plymouth_lsoas_list)
)
census_prop_type_plymouth = census_prop_type_plymouth.with_columns(
sum=pl.sum_horizontal(
"Detached",
"Semi-detached",
"Terraced (including end-terrace)",
"Flat, maisonette or apartment",
"Caravan or other mobile or temporary structure",
)
).with_columns(
(pl.col("Flat, maisonette or apartment") * 100 / pl.col("sum")).alias(
"census_%flats"
)
)
plymouth_lsoas_gdf = plymouth_lsoas_gdf.merge(
census_prop_type_plymouth.to_pandas()[["lsoa", "census_%flats"]],
how="left",
on="lsoa",
)
Comment on lines +223 to +240
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use these census proportions when it comes to using them, rather than proportions calculated from the EPC data.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know! I think we did in the maps - but will make sure going forward


# Get categorisations of HN/ASHP zones

plymouth_lsoas_gdf = plymouth_lsoas_gdf.round(3).rename(columns=column_rename_dict)

quantile_thresh = 0.6
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for choosing 0.6?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeh it feels a bit random, it was because a more normal quantile like 0.75 gave no values for one of the categories, so I felt like I needed to lower it in order to get at least a few LSOAs in each category. Either way, we didn't actually use this categorisation in the mapping, so this could be deleted.

ashp_high_thresh = plymouth_lsoas_gdf["ASHP - Nesta"].quantile(quantile_thresh)
gshp_high_thresh = plymouth_lsoas_gdf["GSHP - Nesta"].quantile(quantile_thresh)
sgl_high_thresh = plymouth_lsoas_gdf["SGL - Nesta"].quantile(quantile_thresh)
hn_high_thresh = plymouth_lsoas_gdf["HN - Nesta"].quantile(quantile_thresh)

plymouth_lsoas_gdf["Overall suitability type - shared/not"] = (
plymouth_lsoas_gdf.apply(
lambda x: get_tech_suitability_overall_4_type(
x["ASHP - Nesta"],
x["GSHP - Nesta"],
x["SGL - Nesta"],
x["HN - Nesta"],
ashp_high_thresh,
gshp_high_thresh,
sgl_high_thresh,
hn_high_thresh,
),
axis=1,
)
)

plymouth_lsoas_gdf["Overall suitability type"] = plymouth_lsoas_gdf.apply(
lambda x: get_tech_suitability_overall_type(
x["ASHP - Nesta"], x["HN - Nesta"], ashp_high_thresh, hn_high_thresh
),
axis=1,
)

plymouth_lsoas_gdf["Manual suitability type"] = plymouth_lsoas_gdf.apply(
lambda x: get_tech_suitability_manual(
x["ASHP - Nesta"],
x["HN - Nesta"],
),
axis=1,
)

plymouth_lsoas_gdf.to_file(
"plymouth_lsoas_gdf_binary_suitability.geojson", driver="GeoJSON"
)
# smaller version without geometry
plymouth_lsoas_gdf.drop(["geometry"], axis=1).to_csv(
"plymouth_lsoas_gdf_binary_suitability.csv"
)

# -----

# Per property dataset (for plotting individual properties and their features)

plymouth_suit_properties = per_prop_data.filter(
pl.col("lsoa").is_in(plymouth_lsoas_list)
)
plymouth_uprn_list = list(plymouth_suit_properties["UPRN"].unique())

property_garden_size = pl.read_parquet(
garden_size_file,
)
plymouth_garden = property_garden_size.filter(
pl.col("UPRN").is_in(plymouth_uprn_list)
)

plymouth_per_prop_data_extra = plymouth_suit_properties.join(
plymouth_garden, on="UPRN", how="left"
)

plymouth_per_prop_data_extra = plymouth_per_prop_data_extra.rename(
{k.split("_weighted")[0]: v for k, v in column_rename_dict.items()}
)

plymouth_per_prop_data_extra = plymouth_per_prop_data_extra.with_columns(
pl.col(
[
"ASHP - Conventional",
"ASHP - Nesta",
"GSHP - Conventional",
"GSHP - Nesta",
"SGL - Conventional",
"SGL - Nesta",
"HN - Conventional",
"HN - Nesta",
"garden_area_m2",
]
).round(3)
)

plymouth_per_prop_data_extra.write_csv("plymouth_per_prop_data_extra.csv")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This path could be updated to save to the /outputs dir instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very true!