Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
kavigupta committed Feb 8, 2025
1 parent e5ccf24 commit 17aeae4
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 112 deletions.
74 changes: 71 additions & 3 deletions urbanstats/data/aggregate_gridded_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from abc import ABC, abstractmethod
from permacache import permacache
from permacache import permacache, stable_hash
import numpy as np
import pandas as pd
import shapely
import tqdm.auto as tqdm

from urbanstats.data.canada.canada_blocks import load_canada_db_shapefile
from urbanstats.data.census_blocks import load_raw_census
from urbanstats.geometry.census_aggregation import aggregate_by_census_block
from urbanstats.data.gpw import compute_gpw_weighted_for_shape, load_full_ghs
from urbanstats.geometry.census_aggregation import (
aggregate_by_census_block,
aggregate_by_census_block_canada,
)


class GriddedDataSource(ABC):
Expand All @@ -20,7 +27,39 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):


@permacache(
"urbanstats/data/aggregated_gridded_data/elevation_statistics_for_american_shapefile",
"urbanstats/data/aggregate_gridded_data/statistics_for_shape",
key_function=dict(
shape=lambda x: stable_hash(shapely.to_geojson(x)),
),
)
def statistics_for_shape(gridded_data_sources, shape):
return compute_gpw_weighted_for_shape(
shape,
load_full_ghs(),
{
k: (v.load_gridded_data(60 * 2), True)
for k, v in gridded_data_sources.items()
},
do_histograms=False,
)


@permacache(
"urbanstats/data/aggregate_gridded_data/statistics_for_shapefile",
key_function=dict(shapefile=lambda x: x.hash_key),
)
def statistics_for_shapefile(gridded_data_sources, shapefile):
sf = shapefile.load_file()
result = {k: [] for k in gridded_data_sources}
for shape in tqdm.tqdm(sf.geometry):
stats, _ = statistics_for_shape(gridded_data_sources, shape)
for k, v in stats.items():
result[k].append(v)
return result


@permacache(
"urbanstats/data/aggregate_gridded_data/statistics_for_american_shapefile",
key_function=dict(sf=lambda x: x.hash_key),
)
def statistics_for_american_shapefile(gridded_data_sources, sf):
Expand All @@ -36,12 +75,41 @@ def statistics_for_american_shapefile(gridded_data_sources, sf):
return result


@permacache(
"urbanstats/data/aggregate_gridded_data/statistics_for_canada_shapefile",
key_function=dict(sf=lambda x: x.hash_key),
)
def statistics_for_canada_shapefile(gridded_data_sources, sf, year=2021):
canada_db = load_canada_db_shapefile(year)
stats_times_population = (
stats_by_canada_blocks(gridded_data_sources, year)
* np.array(canada_db.population)[:, None]
)
stats_times_population["population"] = canada_db.population
agg = aggregate_by_census_block_canada(
year,
sf,
stats_times_population,
)
for k in agg.columns[:-1]:
agg[k] = agg[k] / agg.population
del agg["population"]
return agg


@permacache("urbanstats/data/aggregate_gridded_data/stats_by_blocks")
def stats_by_blocks(gridded_data_sources, year):
_, _, _, _, coordinates = load_raw_census(year)
return disaggregate_both_to_blocks(gridded_data_sources, coordinates)


@permacache("urbanstats/data/aggregate_gridded_data/stats_by_canada_blocks")
def stats_by_canada_blocks(gridded_data_sources, year):
geos = load_canada_db_shapefile(year).geometry
coordinates = np.array([geos.y, geos.x]).T
return disaggregate_both_to_blocks(gridded_data_sources, coordinates)


def disaggregate_both_to_blocks(gridded_data_sources, coordinates):
return pd.DataFrame(
{
Expand Down
97 changes: 6 additions & 91 deletions urbanstats/data/elevation.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def create_full_image(function, chunk_reduction):
from .aggregate_gridded_data import GriddedDataSource


@dataclass
@dataclass(frozen=True)
class ElevationGriddedData(GriddedDataSource):
@lru_cache(maxsize=None)
def load_gridded_data(self, resolution: int | str = "most_detailed"):
Expand All @@ -174,7 +174,7 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):
return create_full_image(aggregated_elevation, 2)


@dataclass
@dataclass(frozen=True)
class HillinessGriddedData(GriddedDataSource):
@lru_cache(maxsize=None)
def load_gridded_data(self, resolution: int | str = "most_detailed"):
Expand All @@ -184,17 +184,10 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):
return create_full_image(aggregated_hilliness, 2)


# def disaggregate_to_blocks(function, coordinates):
# lat, lon = coordinates.T
# full_img = create_full_image(function, 1)
# by_block = look_up(full_img, lat, lon)
# return by_block


# def disaggregate_both_to_blocks(coordinates):
# elevation = disaggregate_to_blocks(aggregated_elevation, coordinates)
# hilliness = disaggregate_to_blocks(aggregated_hilliness, coordinates)
# return pd.DataFrame(dict(elevation=elevation, hilliness=hilliness))
elevation_gds = {
"gridded_hilliness": HillinessGriddedData(),
"gridded_elevation": ElevationGriddedData(),
}


@lru_cache(maxsize=1)
Expand All @@ -205,81 +198,3 @@ def full_elevation():
@lru_cache(maxsize=1)
def full_hilliness():
return create_full_image(aggregated_hilliness, 2)


# @permacache("urbanstats/data/elevation/stats_by_blocks")
# def stats_by_blocks(year):
# _, _, _, _, coordinates = load_raw_census(year)
# return disaggregate_both_to_blocks(coordinates)


@permacache("urbanstats/data/elevation/stats_by_canada_blocks_2")
def stats_by_canada_blocks(year):
geos = load_canada_db_shapefile(year).geometry
coordinates = np.array([geos.y, geos.x]).T
return disaggregate_both_to_blocks(coordinates)


# @permacache(
# "urbanstats/data/elevation/elevation_statistics_for_american_shapefile_2",
# key_function=dict(sf=lambda x: x.hash_key),
# )
# def elevation_statistics_for_american_shapefile(sf):
# _, population_2020, *_ = load_raw_census(2020)
# stats_times_population = stats_by_blocks(2020) * population_2020
# stats_times_population["population"] = population_2020[:, 0]
# result = aggregate_by_census_block(2020, sf, stats_times_population)
# for k in result.columns[:-1]:
# result[k] = result[k] / result.population
# del result["population"]
# return result


@permacache(
"urbanstats/data/elevation/elevation_statistics_for_canada_shapefile",
key_function=dict(sf=lambda x: x.hash_key),
)
def elevation_statistics_for_canada_shapefile(sf, year=2021):
canada_db = load_canada_db_shapefile(year)
stats_times_population = (
stats_by_canada_blocks(year) * np.array(canada_db.population)[:, None]
)
stats_times_population["population"] = canada_db.population
agg = aggregate_by_census_block_canada(
year,
sf,
stats_times_population,
)
for k in agg.columns[:-1]:
agg[k] = agg[k] / agg.population
del agg["population"]
return agg


@permacache(
"urbanstats/data/elevation/elevation_statistics_for_shape_2",
key_function=dict(
shape=lambda x: stable_hash(shapely.to_geojson(x)),
),
)
def elevation_statistics_for_shape(shape):
return compute_gpw_weighted_for_shape(
shape,
load_full_ghs(),
{"elevation": (full_elevation(), True), "hilliness": (full_hilliness(), True)},
do_histograms=False,
)


@permacache(
"urbanstats/data/elevation/elevation_statistics_for_shapefile_2",
key_function=dict(shapefile=lambda x: x.hash_key),
)
def elevation_statistics_for_shapefile(shapefile):
sf = shapefile.load_file()
result = {"elevation": [], "hilliness": []}
for shape in tqdm.tqdm(sf.geometry):
stats, _ = elevation_statistics_for_shape(shape)
result["elevation"].append(stats["elevation"])
result["hilliness"].append(stats["hilliness"])
return result
32 changes: 14 additions & 18 deletions urbanstats/statistics/collections/elevation_hilliness.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np

from urbanstats.data.aggregate_gridded_data import (
statistics_for_american_shapefile,
statistics_for_canada_shapefile,
statistics_for_shapefile,
)
from urbanstats.data.elevation import (
elevation_statistics_for_american_shapefile,
elevation_statistics_for_canada_shapefile,
elevation_statistics_for_shapefile,
elevation_gds,
)
from urbanstats.games.quiz_question_metadata import (
ELEVATION,
Expand Down Expand Up @@ -85,24 +88,17 @@ def compute_statistics_dictionary(
def compute_intl(self, shapefile):
if "international_gridded_data" not in shapefile.special_data_sources:
return {}
result = elevation_statistics_for_shapefile(shapefile)
return {
"gridded_hilliness": result["hilliness"],
"gridded_elevation": result["elevation"],
}
result = statistics_for_shapefile(elevation_gds, shapefile)
return result

def compute_usa(self, *, shapefile, existing_statistics, shapefile_table):
del existing_statistics, shapefile_table
table = elevation_statistics_for_american_shapefile(shapefile)
return {
"gridded_hilliness": table["hilliness"],
"gridded_elevation": table["elevation"],
}
table = statistics_for_american_shapefile(elevation_gds, shapefile)
return table

def compute_canada(self, *, shapefile, existing_statistics, shapefile_table):
del existing_statistics, shapefile_table
table = elevation_statistics_for_canada_shapefile(shapefile)
return {
"gridded_hilliness": table["hilliness"],
"gridded_elevation": table["elevation"],
}
table = statistics_for_canada_shapefile(elevation_gds, shapefile)
print(elevation_gds.keys())
print(table.keys())
return table

0 comments on commit 17aeae4

Please sign in to comment.