forked from geopandas/geopandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsindex.py
102 lines (83 loc) · 3.37 KB
/
sindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from shapely.geometry import Point
from geopandas.tests.util import _NATURALEARTH_LOWRES
from geopandas.tests.util import _NATURALEARTH_CITIES
from geopandas import read_file, GeoSeries
# Derive list of valid query predicates based on underlying index backend;
# we have to create a non-empty instance of the index to get these
index = GeoSeries([Point(0, 0)]).sindex
predicates = sorted(p for p in index.valid_query_predicates if p is not None)
geom_types = ("mixed", "points", "polygons")
def generate_test_df():
world = read_file(_NATURALEARTH_LOWRES)
capitals = read_file(_NATURALEARTH_CITIES)
countries = world.to_crs("epsg:3395")[["geometry"]]
capitals = capitals.to_crs("epsg:3395")[["geometry"]]
mixed = capitals.append(countries) # get a mix of geometries
points = capitals
polygons = countries
# filter out invalid geometries
data = {
"mixed": mixed[mixed.is_valid],
"points": points[points.is_valid],
"polygons": polygons[polygons.is_valid],
}
# ensure index is pre-generated
for data_type in data.keys():
data[data_type].sindex.query(data[data_type].geometry.values[0])
return data
class BenchIntersection:
param_names = ["input_geom_type", "tree_geom_type"]
params = [
geom_types,
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
# cache bounds so that bound creation is not counted in benchmarks
self.bounds = {
data_type: [g.bounds for g in self.data[data_type].geometry]
for data_type in self.data.keys()
}
def time_intersects(self, input_geom_type, tree_geom_type):
tree = self.data[tree_geom_type].sindex
for bounds in self.bounds[input_geom_type]:
tree.intersection(bounds)
class BenchIndexCreation:
param_names = ["tree_geom_type"]
params = [
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
def time_index_creation(self, tree_geom_type):
"""Time creation of spatial index.
Note: requires running a single query to ensure that
lazy-building indexes are actually built.
"""
# Note: the GeoDataFram._sindex_generated attribute will
# be removed by GH#1444 but is kept here (in the benchmarks
# so that we can compare pre GH#1444 to post GH#1444 if needed
self.data[tree_geom_type]._sindex_generated = None
self.data[tree_geom_type].geometry.values._sindex = None
tree = self.data[tree_geom_type].sindex
# also do a single query to ensure the index is actually
# generated and used
tree.query(self.data[tree_geom_type].geometry.values[0])
class BenchQuery:
param_names = ["predicate", "input_geom_type", "tree_geom_type"]
params = [
predicates,
geom_types,
geom_types,
]
def setup(self, *args):
self.data = generate_test_df()
def time_query_bulk(self, predicate, input_geom_type, tree_geom_type):
self.data[tree_geom_type].sindex.query(
self.data[input_geom_type].geometry.values,
predicate=predicate,
)
def time_query(self, predicate, input_geom_type, tree_geom_type):
tree = self.data[tree_geom_type].sindex
for geom in self.data[input_geom_type].geometry.values:
tree.query(geom, predicate=predicate)