-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_vectorisation.py
356 lines (272 loc) · 13.8 KB
/
02_vectorisation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# ==================================================================================================================
# DISSERTATION
# SECTION 01C: Python replacement of QGIS operations
# This script is designed to replace the QGIS internal operations used for the initial project method.
# Author: J Post
# ==================================================================================================================
# 0. IMPORT PACKAGES
import os
import glob
import subprocess
import sys
import json
import time
import pandas as pd
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from osgeo import gdal
import rasterstats as rs
from rasterstats import zonal_stats
import json
import rasterio
from rasterio.merge import merge
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
from rasterio.plot import show
from rasterio.features import shapes
from rasterio.crs import CRS
from shapely.geometry import shape
from shapely.geometry import Point
from shapely.geometry import box
# from pygeos import Geometry
from globals import * # Imports the filepaths defined in globals.py
print('Packages imported.\n')
# Start timer
start_time = time.time()
# ==================================================================================================================
# 2. QGIS PROCESSES: GHSL
time_ghsl = time.time()
# ===========
# 2.1 Merge GHSL inputs into single raster covering all of India
time_21s = time.time()
if not os.path.isfile(ghsl_merged):
src_files_to_merge = [] # initialise empty list
for file in ghsl_to_merge:
src = rasterio.open(file)
src_files_to_merge.append(src)
merged, out_trans = merge(src_files_to_merge)
out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
"height": merged.shape[1],
"width": merged.shape[2],
"transform": out_trans})
if not os.path.isfile(ghsl_merged):
with rasterio.open(ghsl_merged # output filepath
, "w" # = overwrite existing files
, **out_meta # set the file metadata
) as dest:
dest.write(merged)
print('GHSL inputs merged into a single raster file.\n')
timestamp(time_21s)
# # ===========
# 2.2 Convert the CRS of merged GHSL file
time_22s = time.time()
if not os.path.isfile(ghsl_merged_wgs84):
dst_crs = 'EPSG:4326'
with rasterio.open(ghsl_merged) as src:
transform, width, height = calculate_default_transform(
src.crs, dst_crs, src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
with rasterio.open(ghsl_merged_wgs84, 'w', **kwargs) as dst:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=dst_crs,
resampling=Resampling.nearest)
print('GHSL raster converted to CRS EPSG:4326.\n')
timestamp(time_22s)
# ===========
# 2.3 Clip to specified state boundary
time_23s = time.time()
# Read in vector boundaries
districts_shp = gpd.read_file(districts_filepath)
if not os.path.isfile(ghsl_clipped):
# Read in GHSL raster to be clipped
with rasterio.open(ghsl_merged_wgs84) as src:
raster_meta = src.meta
raster_crs = src.crs
# Clip the raster using 'mask' function
clipped_raster, out_transform = mask(dataset=src
, shapes=districts_shp.geometry
, crop=True # crops the raster to the mask geometry
, nodata=0 # sets the value for pixels outside the vector boundaries
, all_touched=True # decides whether to consider all pixels that touch the vector features
)
# Update the metadata of clipped file
clipped_meta = raster_meta.copy()
clipped_meta.update({
'height': clipped_raster.shape[1],
'width': clipped_raster.shape[2],
'transform': out_transform,
'crs': raster_crs
})
# Export clipped raster to .tif
with rasterio.open(ghsl_clipped, 'w', **clipped_meta) as dst:
dst.write(clipped_raster)
print('GHSL raster clipped to state boundaries and exported as .tif.\n')
timestamp(time_23s)
# ===========
# 2.5 Vectorise the GHSL raster layer
if not os.path.isfile(ghsl_poly_dissolved):
time_24s = time.time()
# Read in GHSL raster
with rasterio.open(ghsl_clipped) as src:
raster_data = src.read(1).astype(np.float32) # use 'astype' to ensure values are in a format that can be used by shapely
transform = src.transform # Get the transformation matrix to convert pixel coordinates to geographic coordinates
raster_crs = src.crs
# Filter out the target class values during shape generation
target_classes = [11, 12, 13, 21]
vector_features = (shape(geom).buffer(0) for geom, val in shapes(raster_data, transform=transform) if val in target_classes)
# Create a GeoDataFrame directly from the shapes iterator
gdf_ghsl = gpd.GeoDataFrame({'geometry': vector_features}, crs=raster_crs)
gdf_ghsl['geometry'] = gdf_ghsl['geometry'].apply(lambda geom: shape(geom).buffer(0)) # Fix the geometries in GeoDataFrame (resolves self-intersections, overlapping polygons, etc.)
print('GHSL raster vectorised.\n')
timestamp(time_24s)
# Dissolve geometries into a single feature
time_25s = time.time()
gdf_ghsl['dissolve_id'] = 1 # Create a new column with a constant value (ensures all dissolved into a single feature)
dissolved_gdf_ghsl = gdf_ghsl.dissolve(by='dissolve_id', as_index=False)
dissolved_gdf_ghsl.drop(columns='dissolve_id', inplace=True) # Remove the 'dissolve_id' column (optional)
print('GHSL vector file dissolved into single feature.\n')
dissolved_gdf_ghsl.to_feather(ghsl_poly_dissolved)
print(f'GHSL vector file exported to {sfmt}.\n')
timestamp(time_25s)
print('GHSL processing complete.')
timestamp(time_ghsl)
# dissolved_gdf_ghsl.plot()
# plt.show()
# ==================================================================================================================
# 3. Agricultural Lands (Dynamic World)
time_cropland = time.time()
# ===========
# 3.1 Vectorise the DynamicWorld raster layer
if not os.path.isfile(cropland_poly_dissolved):
# Read in GHSL raster
with rasterio.open(cropland) as src:
raster_data = src.read(1) # Selects the 1st band in input file
# Get the transformation matrix to convert pixel coordinates to geographic coordinates
transform = src.transform
raster_crs = src.crs
# Filter out the target class values during shape generation
target_classes = [1]
vector_features = (shape(geom).buffer(0) for geom, val in shapes(raster_data, transform=transform) if val in target_classes)
# Create a GeoDataFrame directly from the shapes iterator
gdf_cropland = gpd.GeoDataFrame({'geometry': vector_features}, crs=raster_crs)
gdf_cropland['geometry'] = gdf_cropland['geometry'].apply(lambda geom: shape(geom).buffer(0)) # Fix the geometries in GeoDataFrame (resolves self-intersections, overlapping polygons, etc.)
print('DynamicWorld raster vectorised.\n')
# Dissolve geometries into a single feature
gdf_cropland['dissolve_id'] = 1 # Create a new column with a constant value (ensures all dissolved into a single feature)
dissolved_gdf_cropland = gdf_cropland.dissolve(by='dissolve_id', as_index=False)
dissolved_gdf_cropland.drop(columns='dissolve_id', inplace=True) # Remove the 'dissolve_id' column (optional)
print('DynamicWorld vector file dissolved into single feature.\n')
dissolved_gdf_cropland.to_feather(cropland_poly_dissolved)
print(f'DynamicWorld vector file exported to {sfmt}.\n')
print('DynamicWorld processing complete.')
timestamp(time_cropland)
# dissolved_gdf_cropland.plot()
# plt.show()
# ==================================================================================================================
# 4. WorldPop
time_worldpop = time.time()
# ===========
# 4.1 clip the boundaries of worldpop to state FIRST, and then generate point dataset
time_41s = time.time()
if not os.path.isfile(pop_tif_clipped):
# Read in WorldPop raster to be clipped
with rasterio.open(pop_tif) as src:
raster_meta = src.meta
raster_crs = src.crs
# Clip the raster using 'mask' function
clipped_raster, out_transform = mask(dataset=src
, shapes=districts_shp.geometry
, crop=True
, nodata=0 # sets the value for pixels outside the vector boundaries
, all_touched=True # decides whether to consider all pixels that touch the vector features
)
# Update the metadata of clipped file
clipped_meta = raster_meta.copy()
clipped_meta.update({
'height': clipped_raster.shape[1],
'width': clipped_raster.shape[2],
'transform': out_transform,
'crs': raster_crs
})
# Export clipped raster to .tif
with rasterio.open(pop_tif_clipped, 'w', **clipped_meta) as dst:
dst.write(clipped_raster)
print('WorldPop raster clipped to state boundaries and exported as .tif.\n')
timestamp(time_41s)
# ===========
# 4.2 Vectorise the WorldPop raster layer into points
if not os.path.isfile(pop_points):
time_42s = time.time()
with rasterio.open(pop_tif_clipped) as src:
raster_data = src.read(1) # Read the raster data and mask out any NoData values
transform = src.transform # Get the transformation matrix to convert pixel coordinates to geographic coordinates
raster_crs = src.crs
num_rows, num_cols = raster_data.shape # Get the number of rows and columns in the raster
x_coords, y_coords = np.meshgrid(np.arange(0, num_cols), # Create a regular grid of points based on the raster's extent and resolution
np.arange(0, num_rows))
points = np.c_[x_coords.ravel(), y_coords.ravel()]
# Transform the grid of points from pixel coordinates to geographic coordinates
lon, lat = rasterio.transform.xy(transform, points[:, 1], points[:, 0])
# Get the raster values at each point location
raster_values = raster_data.ravel()
# Create a GeoDataFrame with the points and set the CRS to match the raster
geometry = [Point(lon[i], lat[i]) for i in range(len(lon))]
gdf_pop = gpd.GeoDataFrame({'geometry': geometry, 'raster_value': raster_values}, crs=raster_crs)
print('Worldpop raster converted into points')
gdf_pop.to_feather(pop_points)
print(f'WorldPop points exported as {sfmt}.\n')
timestamp(time_42s)
print('WorldPop processing complete.')
timestamp(time_worldpop)
# gdf_pop.plot(column='raster_value')
# plt.show()
# ==================================================================================================================
# EXTRA. CALCULATE CROPLAND AND RURAL AREA BY DISTRICT
# Input files:
# 1. vector (feather) of cropland area, for state
# 2. vector (feather) of rural area, for state
# 3. district boundaries, for state (already read in above)
if not os.path.isfile(cropland_area_path):
cropland_poly = gpd.read_feather(cropland_poly_dissolved)
# Set to projected crs (for area/distance calculations)
# Selection: EPSG:24378 (Kalinapur 1975 / India Zone I)
# NOTE: Decision to comment out the below transformation to save processing time
# Compared the difference between results in terms of % of total area; difference is minuscule and does not affect interpretation.
# cropland_poly.to_crs(crs = 'EPSG:24378', inplace=True)
# rural_poly.to_crs(crs = cropland_poly.crs, inplace=True)
# districts_shp.to_crs(crs = cropland_poly.crs, inplace=True)
# Calculate district area
districts_shp['district_area'] = districts_shp['geometry'].area
# Intersect cropland with district boundaries
cropland_by_district = gpd.overlay(districts_shp, cropland_poly, how="intersection")
# Calculate the area of cropland within each district
cropland_by_district['cropland_area'] = cropland_by_district['geometry'].area
cropland_by_district['crop_area_pc'] = cropland_by_district['cropland_area'] / cropland_by_district['district_area'] * 100
# Export files
df_cropland_area = pd.DataFrame(cropland_by_district.drop(columns = ['geometry', 'd_name', 'pc11_s_id']))
df_cropland_area.to_csv(cropland_area_path, index=False)
if not os.path.isfile(rural_area_path):
rural_poly = gpd.read_feather(ghsl_poly_dissolved)
rural_by_district = districts_shp.overlay(rural_poly, how="intersection")
rural_by_district['rural_area'] = rural_by_district['geometry'].area
rural_by_district['rural_area_pc'] = rural_by_district['rural_area'] / rural_by_district['district_area'] * 100
df_rural_area = pd.DataFrame(rural_by_district.drop(columns = ['geometry', 'd_name', 'pc11_s_id']))
df_rural_area.to_csv(rural_area_path, index=False)
# ===============
print('\nScript complete.\n')
timestamp(start_time)