updates

kavigupta · Feb 8, 2025 · 17aeae4 · 17aeae4
1 parent e5ccf24
commit 17aeae4
Show file tree

Hide file tree

Showing 3 changed files with 91 additions and 112 deletions.
diff --git a/urbanstats/data/aggregate_gridded_data.py b/urbanstats/data/aggregate_gridded_data.py
@@ -1,10 +1,17 @@
 from abc import ABC, abstractmethod
-from permacache import permacache
+from permacache import permacache, stable_hash
 import numpy as np
 import pandas as pd
+import shapely
+import tqdm.auto as tqdm
 
+from urbanstats.data.canada.canada_blocks import load_canada_db_shapefile
 from urbanstats.data.census_blocks import load_raw_census
-from urbanstats.geometry.census_aggregation import aggregate_by_census_block
+from urbanstats.data.gpw import compute_gpw_weighted_for_shape, load_full_ghs
+from urbanstats.geometry.census_aggregation import (
+    aggregate_by_census_block,
+    aggregate_by_census_block_canada,
+)
 
 
 class GriddedDataSource(ABC):
@@ -20,7 +27,39 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):
 
 
 @permacache(
-    "urbanstats/data/aggregated_gridded_data/elevation_statistics_for_american_shapefile",
+    "urbanstats/data/aggregate_gridded_data/statistics_for_shape",
+    key_function=dict(
+        shape=lambda x: stable_hash(shapely.to_geojson(x)),
+    ),
+)
+def statistics_for_shape(gridded_data_sources, shape):
+    return compute_gpw_weighted_for_shape(
+        shape,
+        load_full_ghs(),
+        {
+            k: (v.load_gridded_data(60 * 2), True)
+            for k, v in gridded_data_sources.items()
+        },
+        do_histograms=False,
+    )
+
+
+@permacache(
+    "urbanstats/data/aggregate_gridded_data/statistics_for_shapefile",
+    key_function=dict(shapefile=lambda x: x.hash_key),
+)
+def statistics_for_shapefile(gridded_data_sources, shapefile):
+    sf = shapefile.load_file()
+    result = {k: [] for k in gridded_data_sources}
+    for shape in tqdm.tqdm(sf.geometry):
+        stats, _ = statistics_for_shape(gridded_data_sources, shape)
+        for k, v in stats.items():
+            result[k].append(v)
+    return result
+
+
+@permacache(
+    "urbanstats/data/aggregate_gridded_data/statistics_for_american_shapefile",
     key_function=dict(sf=lambda x: x.hash_key),
 )
 def statistics_for_american_shapefile(gridded_data_sources, sf):
@@ -36,12 +75,41 @@ def statistics_for_american_shapefile(gridded_data_sources, sf):
     return result
 
 
+@permacache(
+    "urbanstats/data/aggregate_gridded_data/statistics_for_canada_shapefile",
+    key_function=dict(sf=lambda x: x.hash_key),
+)
+def statistics_for_canada_shapefile(gridded_data_sources, sf, year=2021):
+    canada_db = load_canada_db_shapefile(year)
+    stats_times_population = (
+        stats_by_canada_blocks(gridded_data_sources, year)
+        * np.array(canada_db.population)[:, None]
+    )
+    stats_times_population["population"] = canada_db.population
+    agg = aggregate_by_census_block_canada(
+        year,
+        sf,
+        stats_times_population,
+    )
+    for k in agg.columns[:-1]:
+        agg[k] = agg[k] / agg.population
+    del agg["population"]
+    return agg
+
+
 @permacache("urbanstats/data/aggregate_gridded_data/stats_by_blocks")
 def stats_by_blocks(gridded_data_sources, year):
     _, _, _, _, coordinates = load_raw_census(year)
     return disaggregate_both_to_blocks(gridded_data_sources, coordinates)
 
 
+@permacache("urbanstats/data/aggregate_gridded_data/stats_by_canada_blocks")
+def stats_by_canada_blocks(gridded_data_sources, year):
+    geos = load_canada_db_shapefile(year).geometry
+    coordinates = np.array([geos.y, geos.x]).T
+    return disaggregate_both_to_blocks(gridded_data_sources, coordinates)
+
+
 def disaggregate_both_to_blocks(gridded_data_sources, coordinates):
     return pd.DataFrame(
         {

diff --git a/urbanstats/data/elevation.py b/urbanstats/data/elevation.py
@@ -164,7 +164,7 @@ def create_full_image(function, chunk_reduction):
 from .aggregate_gridded_data import GriddedDataSource
 
 
-@dataclass
+@dataclass(frozen=True)
 class ElevationGriddedData(GriddedDataSource):
     @lru_cache(maxsize=None)
     def load_gridded_data(self, resolution: int | str = "most_detailed"):
@@ -174,7 +174,7 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):
         return create_full_image(aggregated_elevation, 2)
 
 
-@dataclass
+@dataclass(frozen=True)
 class HillinessGriddedData(GriddedDataSource):
     @lru_cache(maxsize=None)
     def load_gridded_data(self, resolution: int | str = "most_detailed"):
@@ -184,17 +184,10 @@ def load_gridded_data(self, resolution: int | str = "most_detailed"):
         return create_full_image(aggregated_hilliness, 2)
 
 
-# def disaggregate_to_blocks(function, coordinates):
-#     lat, lon = coordinates.T
-#     full_img = create_full_image(function, 1)
-#     by_block = look_up(full_img, lat, lon)
-#     return by_block
-
-
-# def disaggregate_both_to_blocks(coordinates):
-#     elevation = disaggregate_to_blocks(aggregated_elevation, coordinates)
-#     hilliness = disaggregate_to_blocks(aggregated_hilliness, coordinates)
-#     return pd.DataFrame(dict(elevation=elevation, hilliness=hilliness))
+elevation_gds = {
+    "gridded_hilliness": HillinessGriddedData(),
+    "gridded_elevation": ElevationGriddedData(),
+}
 
 
 @lru_cache(maxsize=1)
@@ -205,81 +198,3 @@ def full_elevation():
 @lru_cache(maxsize=1)
 def full_hilliness():
     return create_full_image(aggregated_hilliness, 2)
-
-
-# @permacache("urbanstats/data/elevation/stats_by_blocks")
-# def stats_by_blocks(year):
-#     _, _, _, _, coordinates = load_raw_census(year)
-#     return disaggregate_both_to_blocks(coordinates)
-
-
-@permacache("urbanstats/data/elevation/stats_by_canada_blocks_2")
-def stats_by_canada_blocks(year):
-    geos = load_canada_db_shapefile(year).geometry
-    coordinates = np.array([geos.y, geos.x]).T
-    return disaggregate_both_to_blocks(coordinates)
-
-
-# @permacache(
-#     "urbanstats/data/elevation/elevation_statistics_for_american_shapefile_2",
-#     key_function=dict(sf=lambda x: x.hash_key),
-# )
-# def elevation_statistics_for_american_shapefile(sf):
-#     _, population_2020, *_ = load_raw_census(2020)
-#     stats_times_population = stats_by_blocks(2020) * population_2020
-#     stats_times_population["population"] = population_2020[:, 0]
-#     result = aggregate_by_census_block(2020, sf, stats_times_population)
-#     for k in result.columns[:-1]:
-#         result[k] = result[k] / result.population
-#     del result["population"]
-#     return result
-
-
-@permacache(
-    "urbanstats/data/elevation/elevation_statistics_for_canada_shapefile",
-    key_function=dict(sf=lambda x: x.hash_key),
-)
-def elevation_statistics_for_canada_shapefile(sf, year=2021):
-    canada_db = load_canada_db_shapefile(year)
-    stats_times_population = (
-        stats_by_canada_blocks(year) * np.array(canada_db.population)[:, None]
-    )
-    stats_times_population["population"] = canada_db.population
-    agg = aggregate_by_census_block_canada(
-        year,
-        sf,
-        stats_times_population,
-    )
-    for k in agg.columns[:-1]:
-        agg[k] = agg[k] / agg.population
-    del agg["population"]
-    return agg
-
-
-@permacache(
-    "urbanstats/data/elevation/elevation_statistics_for_shape_2",
-    key_function=dict(
-        shape=lambda x: stable_hash(shapely.to_geojson(x)),
-    ),
-)
-def elevation_statistics_for_shape(shape):
-    return compute_gpw_weighted_for_shape(
-        shape,
-        load_full_ghs(),
-        {"elevation": (full_elevation(), True), "hilliness": (full_hilliness(), True)},
-        do_histograms=False,
-    )
-
-
-@permacache(
-    "urbanstats/data/elevation/elevation_statistics_for_shapefile_2",
-    key_function=dict(shapefile=lambda x: x.hash_key),
-)
-def elevation_statistics_for_shapefile(shapefile):
-    sf = shapefile.load_file()
-    result = {"elevation": [], "hilliness": []}
-    for shape in tqdm.tqdm(sf.geometry):
-        stats, _ = elevation_statistics_for_shape(shape)
-        result["elevation"].append(stats["elevation"])
-        result["hilliness"].append(stats["hilliness"])
-    return result
diff --git a/urbanstats/statistics/collections/elevation_hilliness.py b/urbanstats/statistics/collections/elevation_hilliness.py
@@ -1,9 +1,12 @@
 import numpy as np
 
+from urbanstats.data.aggregate_gridded_data import (
+    statistics_for_american_shapefile,
+    statistics_for_canada_shapefile,
+    statistics_for_shapefile,
+)
 from urbanstats.data.elevation import (
-    elevation_statistics_for_american_shapefile,
-    elevation_statistics_for_canada_shapefile,
-    elevation_statistics_for_shapefile,
+    elevation_gds,
 )
 from urbanstats.games.quiz_question_metadata import (
     ELEVATION,
@@ -85,24 +88,17 @@ def compute_statistics_dictionary(
     def compute_intl(self, shapefile):
         if "international_gridded_data" not in shapefile.special_data_sources:
             return {}
-        result = elevation_statistics_for_shapefile(shapefile)
-        return {
-            "gridded_hilliness": result["hilliness"],
-            "gridded_elevation": result["elevation"],
-        }
+        result = statistics_for_shapefile(elevation_gds, shapefile)
+        return result
 
     def compute_usa(self, *, shapefile, existing_statistics, shapefile_table):
         del existing_statistics, shapefile_table
-        table = elevation_statistics_for_american_shapefile(shapefile)
-        return {
-            "gridded_hilliness": table["hilliness"],
-            "gridded_elevation": table["elevation"],
-        }
+        table = statistics_for_american_shapefile(elevation_gds, shapefile)
+        return table
 
     def compute_canada(self, *, shapefile, existing_statistics, shapefile_table):
         del existing_statistics, shapefile_table
-        table = elevation_statistics_for_canada_shapefile(shapefile)
-        return {
-            "gridded_hilliness": table["hilliness"],
-            "gridded_elevation": table["elevation"],
-        }
+        table = statistics_for_canada_shapefile(elevation_gds, shapefile)
+        print(elevation_gds.keys())
+        print(table.keys())
+        return table