Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIF-330 [METRIC] isochrone and isodistance metrics #87

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ca2596a
initial commit
tedw0ng Oct 24, 2024
03aa10c
change isochrone to isoline, address filename naming convention
tedw0ng Nov 20, 2024
1996fa6
interact with AWS
tedw0ng Nov 22, 2024
697c2bc
fix bug when district-by-district not necessary
tedw0ng Nov 22, 2024
300f866
fix test to pass dict instead of positional args
tedw0ng Jan 8, 2025
2ee7cb6
initial commit
tedw0ng Jan 9, 2025
690f0b1
initial commit
tedw0ng Jan 9, 2025
d683988
fixes after initial testing
tedw0ng Jan 9, 2025
3971168
add tests
tedw0ng Jan 10, 2025
a708857
Merge branch 'main' into percent-with-access-to-amenity-metric
tedw0ng Jan 10, 2025
7b4aace
fix bug in test
tedw0ng Jan 10, 2025
c09fd1a
bug fix again
tedw0ng Jan 10, 2025
e094141
add demographic-specific metrics
tedw0ng Jan 12, 2025
3e2615b
test bypass
tedw0ng Jan 12, 2025
df71d82
bug fix
tedw0ng Jan 12, 2025
96d556c
bug fix
tedw0ng Jan 12, 2025
58a64c8
distinguish populationmean and areamean metrics
tedw0ng Jan 13, 2025
ffc15ed
correct pop-weighted indicator calculation
tedw0ng Jan 13, 2025
8c56c2e
Merge branch 'main' into CIF-330-isochrone-and-isodistance-metrics
tedw0ng Jan 15, 2025
6a7f86c
clean up percent_population_access
tedw0ng Jan 15, 2025
c5889be
bug fix
tedw0ng Jan 15, 2025
e625c39
overhaul count_accessible_amenities metric
tedw0ng Jan 17, 2025
9be9c61
bug fixes
tedw0ng Jan 17, 2025
a65984e
bug fix for district with zero isoline polys
tedw0ng Jan 18, 2025
63536a4
Bug fix for single simple isoline polygon
tedw0ng Jan 18, 2025
161c71a
typo fix
tedw0ng Jan 18, 2025
7b6a0ad
clarifying comments
tedw0ng Jan 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions city_metrix/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .nasa_dem import NasaDEM
from .era_5_hottest_day import Era5HottestDay
from .impervious_surface import ImperviousSurface
from .isoline import Isoline
from .glad_lulc import LandCoverGlad
from .glad_lulc import LandCoverSimplifiedGlad
from .glad_lulc import LandCoverHabitatGlad
Expand Down
22 changes: 22 additions & 0 deletions city_metrix/layers/isoline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import geopandas as gpd
import shapely
import boto3

from .layer import Layer
from city_metrix.layers.layer import get_utm_zone_epsg

BUCKET_NAME = 'wri-cities-indicators'
PREFIX = 'data/isolines/'

class Isoline(Layer):
def __init__(self, filename, **kwargs):
super().__init__(**kwargs)
# Get data from S3
url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{filename}'
print(f'Attempting to retrieve isoline file from {url}', end=' ')
gdf = gpd.read_file(url)
print('(Succeeded)')
self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry')

def get_data(self, bbox):
return self.gdf.clip(shapely.box(*bbox))
1 change: 1 addition & 0 deletions city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ def write_layer(path, data):
data.to_file(path, driver="GeoJSON")
else:
raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame")
raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame")

def write_dataarray(path, data):
# for rasters, need to write to locally first then copy to cloud storage
Expand Down
6 changes: 5 additions & 1 deletion city_metrix/layers/urban_land_use.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ class UrbanLandUse(Layer):
spatial_resolution: raster resolution in meters (see https://github.com/stac-extensions/raster)
"""

def __init__(self, band='lulc', spatial_resolution=5, **kwargs):
def __init__(self, band='lulc', return_value=None, spatial_resolution=5, **kwargs):
super().__init__(**kwargs)
self.band = band
self.spatial_resolution = spatial_resolution
self.return_value = return_value

def get_data(self, bbox):
ulu = ee.ImageCollection("projects/wri-datalab/cities/urban_land_use/V1")
Expand All @@ -43,4 +44,7 @@ def get_data(self, bbox):
"urban land use"
).lulc

if self.return_value is not None:
data = data.where(data==self.return_value)

return data
2 changes: 2 additions & 0 deletions city_metrix/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from .urban_open_space import urban_open_space
from .natural_areas import natural_areas
from .era_5_met_preprocessing import era_5_met_preprocessing
from .percent_population_access import percent_population_access#, percent_population_access_elderly, percent_population_access_children, percent_population_access_female, percent_population_access_informal
from .count_accessible_amenities import count_accessible_amenities#_areamean, count_accessible_amenities_populationmean
from .recreational_space_per_capita import recreational_space_per_capita
from .vegetation_water_change import vegetation_water_change_gain_area
from .vegetation_water_change import vegetation_water_change_loss_area
Expand Down
137 changes: 137 additions & 0 deletions city_metrix/metrics/count_accessible_amenities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from geopandas import GeoDataFrame, GeoSeries
from pandas import Series
from city_metrix.layers import Layer
from math import floor
import shapely

from city_metrix.layers import Isoline, UrbanLandUse, WorldPop
from city_metrix.layers.layer import get_utm_zone_epsg


def count_accessible_amenities(zones: GeoDataFrame, city_name, amenity_name, travel_mode, threshold_type, threshold_value, retrieval_date, weighting='population', worldpop_agesex_classes=[], worldpop_year=2020, informal_only=False) -> GeoSeries:
# cityname example: ARG-Buenos-Aires
# amenityname is OSMclass names, in lowercase
# travelmode is walk, bike, automobile, publictransit (only walk implemented for now)
# threshold_type is distance or time
# threshold_value is integer, in minutes or meters

class AccessCountTmp(Layer):
def __init__(self, access_gdf, return_value, **kwargs):
super().__init__(**kwargs)
self.gdf = access_gdf[access_gdf['count_amenities']==return_value]
def get_data(self, bbox):
return self.gdf.clip(shapely.box(*bbox))

class IsolinesBuffered(Isoline):
# Stores and returns isochrone or isodistance polygons with polygons slightly buffered and simplified to avoid invalid-geometry errors
def __init__(self, filename, **kwargs):
super().__init__(filename=filename)
# params is dict with keys cityname, amenityname, travelmode, threshold_type, threshold_value, year
iso_gdf = self.gdf
if iso_gdf.crs in ('EPSG:4326', 'epsg:4326'):
utm_epsg = get_utm_zone_epsg(iso_gdf.total_bounds)
iso_gdf = iso_gdf.to_crs(utm_epsg).set_crs(utm_epsg)
poly_list = [shapely.simplify(p.buffer(0.1), tolerance=10) for p in iso_gdf.geometry] # Buffer and simplify
buffered_gdf = GeoDataFrame({'accessible': 1, 'geometry': poly_list}).set_crs(utm_epsg)
self.gdf = buffered_gdf.to_crs('EPSG:4326').set_crs('EPSG:4326')

def get_data(self, bbox):
return self.gdf.clip(shapely.box(*bbox))

filename = f"{city_name}_{amenity_name}_{travel_mode}_{threshold_type}_{threshold_value}_{retrieval_date}.geojson"

population_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year)
if informal_only:
informal_layer = UrbanLandUse(return_value=3)
population_layer.masks.append(informal_layer)

filename = f"{city_name}_{amenity_name}_{travel_mode}_{threshold_type}_{threshold_value}_{retrieval_date}.geojson"
iso_layer = IsolinesBuffered(filename=filename)
results = []
for rownum in range(len(zones)):
zone = zones.iloc[[rownum]]
print('\n{0} (geo {1} of {2})'.format(zone.geo_name[rownum], rownum+1, len(zones)))
iso_gdf = iso_layer.get_data(zone.total_bounds)
if len(iso_gdf) == 0:
results.append(0)
else:
if len(iso_gdf) == 1 and type(iso_gdf.iloc[0].geometry) == shapely.geometry.polygon.Polygon: # only one simple-polygon isoline
dissected_gdf = GeoDataFrame({'poly_id': [0], 'geometry': [iso_gdf.iloc[0].geometry]}).set_crs('EPSG:4326')
else:
# Collect individual boundary linestrings of each isoline polygon
iso_eachboundary = [iso_gdf.iloc[rownum]['geometry'].boundary for rownum in range(len(iso_gdf))]
iso_boundarylinesmulti = [i for i in iso_eachboundary if i is not None]
iso_boundarylines = []
for i in iso_boundarylinesmulti:
if type(i) == shapely.geometry.linestring.LineString:
iso_boundarylines.append(i)
else:
for j in list(i.geoms):
iso_boundarylines.append(j)
iso_bl_gdf = GeoDataFrame({'idx': range(len(iso_boundarylines)), 'geometry': iso_boundarylines})
print("Converted isoline polygons into boundary multilines")
# Dissolve all linestrings into large multilinestring, and polygonize into "dissected polygons"

iso_dissected_polys = shapely.polygonize(list(iso_bl_gdf.dissolve().geometry[0].geoms))
print("Creating gdf of dissected polygons")
dissected_gdf = GeoDataFrame({'poly_id': range(len(list(iso_dissected_polys.geoms))), 'geometry': list(iso_dissected_polys.geoms)}).set_crs('EPSG:4326')
# For each dissected polygon, find how many of the original isoline polys contain the centroid
# This is the number of amenity points are accessible within the dissected polygon
print("Counting original isoline polygons that intersect with each dissected polygon")
count_amenities = dissected_gdf.centroid.within(iso_gdf.iloc[[0]].geometry[iso_gdf.index[0]]) * 1 # This is a Series storing running sum, a vector with num rows == num of dissected polys
print("|==" + "PROGRESS=OF=INCLUSION=TESTS" + ("=" * 71) + "|")
print(" ", end="")
progress = 0

for iso_idx in range(1, len(iso_gdf)): # Iterate through all original isoline polys: test dissected polys to see whether centroids are included in isoline poly, add to running sum Series
if floor(100 * iso_idx / len(iso_gdf)) > progress:
progress = floor(100 * iso_idx / len(iso_gdf))
print("X", end="")
count_amenities = count_amenities + (dissected_gdf.centroid.within(iso_gdf.iloc[[iso_idx]].geometry[iso_gdf.index[iso_idx]]) * 1)
print("X")
dissected_gdf['count_amenities'] = count_amenities

# Create dict of polygons each with a single asset-count
max_count = dissected_gdf['count_amenities'].max()
count_layers = {count: AccessCountTmp(access_gdf=dissected_gdf, return_value=count) for count in range(1, max_count + 1)}

# For each zone, find average number of accessible amenities, and store in result_gdf

running_sum = Series([0])
for count in range(1, max_count+1):
try: # Because adding masks to pop_layer adds them to WorldPop(), and they cannot be removed from WorldPop()
pop_layer
except NameError:
pop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year)
else:
pop_layer.masks = []
try:
totalpop_layer
except NameError:
totalpop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year)
else:
totalpop_layer.masks = []
if informal_only:
pop_layer.masks.append(informal_layer)
totalpop_layer.masks.append(informal_layer)
pop_layer.masks.append(count_layers[count])
groupby = pop_layer.groupby(zone)
if weighting == 'area':
try:
running_sum += count * groupby.count().fillna(0)
except:
running_sum += Series([0])
else: # weighting == 'population'
try:
running_sum += count * groupby.sum().fillna(0)
except:
running_sum += Series([0])
if weighting == 'area':
rowresult = running_sum / totalpop_layer.groupby(zone).count()
else: # weighting == 'population'
rowresult = running_sum / totalpop_layer.groupby(zone).sum()

results.append(rowresult[0])

result_gdf = GeoDataFrame({f'{weighting}_averaged_num_accessible_amenities': results, 'geometry': zones['geometry']}).set_crs('EPSG:4326')
return result_gdf
45 changes: 45 additions & 0 deletions city_metrix/metrics/percent_population_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from geopandas import GeoDataFrame, GeoSeries
import shapely
import numpy as np

from city_metrix.layers.isoline import Isoline
from city_metrix.layers.world_pop import WorldPop
from city_metrix.layers.urban_land_use import UrbanLandUse
from city_metrix.layers.layer import Layer, get_utm_zone_epsg


def percent_population_access(zones, city_name, amenity_name, travel_mode, threshold_type, threshold_value, retrieval_date, worldpop_agesex_classes=[], worldpop_year=2020, informal_only=False):
# Example: city_name='MEX-Mexico-City', amenity_name='schools', travel_mode='walk', threshold_type='time', threshold_value=30, retrieval_date='20241105'

class IsolineSimplified(Isoline):
# Stores and returns isochrone or isodistance polygons with simplified geometry, and dissolved into fewer non-overlapping polygons
def __init__(self, filename, **kwargs):
super().__init__(filename=filename)
# params is dict with keys cityname, amenityname, travelmode, threshold_type, threshold_value, year
iso_gdf = self.gdf
if iso_gdf.crs in ('EPSG:4326', 'epsg:4326'):
utm_epsg = get_utm_zone_epsg(iso_gdf.total_bounds)
iso_gdf = iso_gdf.to_crs(utm_epsg).set_crs(utm_epsg)
poly_list = [shapely.simplify(p.buffer(0.1), tolerance=10) for p in iso_gdf.geometry] # Buffer and simplify
uu = shapely.unary_union(shapely.MultiPolygon(poly_list)) # Dissolve
shorter_gdf = GeoDataFrame({'accessible': 1, 'geometry': list(uu.geoms)}).set_crs(utm_epsg)
self.gdf = shorter_gdf.to_crs('EPSG:4326').set_crs('EPSG:4326')

def get_data(self, bbox):
return self.gdf.clip(shapely.box(*bbox))
filename = f"{city_name}_{amenity_name}_{travel_mode}_{threshold_type}_{threshold_value}_{retrieval_date}.geojson"
iso_layer = IsolineSimplified(filename)
accesspop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year, masks=[iso_layer,])
totalpop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year)
if informal_only:
informal_layer = UrbanLandUse(return_value=3)
accesspop_layer.masks.append(informal_layer)
totalpop_layer.masks.append(informal_layer)
res = []
for rownum in range(len(zones)): # Doing it district-by-district to avoid empty-GDF error
try:
res.append(100 * accesspop_layer.groupby(zones.iloc[[rownum]]).sum()[0] / totalpop_layer.groupby(zones.iloc[[rownum]]).sum()[0])
except:
res.append(0)
result_gdf = GeoDataFrame({'access_percent': res, 'geometry': zones.geometry}).set_crs(zones.crs)
return result_gdf
7 changes: 7 additions & 0 deletions tests/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
EsaWorldCoverClass,
HighLandSurfaceTemperature,
ImperviousSurface,
Isoline,
LandCoverGlad,
LandCoverHabitatGlad,
LandCoverHabitatChangeGlad,
Expand Down Expand Up @@ -78,6 +79,12 @@ def test_impervious_surface():
data = ImperviousSurface().get_data(BBOX)
assert np.size(data) > 0

@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="AWS redentials needed")
def test_isoline():
layer = Isoline({'cityname': 'KEN-Nairobi', 'amenityname': 'schools', 'travelmode': 'walk', 'threshold_type': 'time', 'threshold_value': '15', 'year': 2023})
nairobi_bbox = (36.66446402, -1.44560888, 37.10497899, -1.16058296)
data = layer.get_data(nairobi_bbox)

def test_land_cover_glad():
data = LandCoverGlad().get_data(BBOX)
assert np.size(data) > 0
Expand Down
16 changes: 16 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest



def test_built_land_with_high_lst():
indicator = built_land_with_high_land_surface_temperature(ZONES)
expected_zone_size = ZONES.geometry.size
Expand All @@ -23,6 +24,13 @@ def test_built_land_without_tree_cover():
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size

@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="Needs specific zone file to run")
def test_count_accessible_amenities():
nairobi_gdf = None # Need link to a GDF for which the isoline file has been calculated
indicator = count_accessible_amenities(nairobi_gdf, 'KEN-Nairobi', 'schools', 'walk', 'time', 15, '20241105', weighting='population')
expected_zone_size = NAIROBI_BBOX.geometry.size
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size

@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="CDS API needs personal access token file to run")
def test_era_5_met_preprocess():
Expand All @@ -43,6 +51,14 @@ def test_natural_areas():
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size

@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="AWS credentials needed")
def test_percent_population_access():
from .conftest import create_fishnet_grid
NAIROBI_BBOX = create_fishnet_grid(36.66446402, -1.44560888, 37.10497899, -1.16058296, 0.01).reset_index()
indicator = percent_population_access(NAIROBI_BBOX, 'KEN-Nairobi', 'schools', 'walk', 'time', '15', 2024, aws_profilename=None)
expected_zone_size = NAIROBI_BBOX.geometry.size
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size

def test_recreational_space_per_capita():
indicator = recreational_space_per_capita(ZONES)
Expand Down
Loading