Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Selectivity estimators for <@(spoint, scircle) and spoint_dwithin(spoint, spoint, float8) #80

Merged
merged 1 commit into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ SRC_DIR = $(shell basename $(shell pwd))

MODULE_big = pg_sphere
OBJS = src/sscan.o src/sparse.o src/sbuffer.o src/vector3d.o src/point.o \
src/euler.o src/circle.o src/line.o src/ellipse.o src/polygon.o \
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o \
src/euler.o src/circle.o src/circle_sel.o src/line.o src/ellipse.o src/polygon.o \
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o src/gist_support.o \
src/key.o src/gnomo.o src/epochprop.o src/brin.o

ifneq ($(USE_HEALPIX),0)
Expand All @@ -35,11 +35,11 @@ DATA_built = $(RELEASE_SQL) \
DOCS = README.pg_sphere COPYRIGHT.pg_sphere
REGRESS = init tables points euler circle line ellipse poly path box index \
contains_ops contains_ops_compat bounding_box_gist gnomo epochprop \
contains overlaps spoint_brin sbox_brin
contains overlaps spoint_brin sbox_brin selectivity

TESTS = init_test tables points euler circle line ellipse poly path box \
index contains_ops contains_ops_compat bounding_box_gist gnomo \
epochprop contains overlaps spoint_brin sbox_brin
epochprop contains overlaps spoint_brin sbox_brin selectivity

PG_CFLAGS += -DPGSPHERE_VERSION=$(PGSPHERE_VERSION)
PG_CPPFLAGS += -DPGSPHERE_VERSION=$(PGSPHERE_VERSION)
Expand All @@ -58,7 +58,7 @@ CRUSH_TESTS = init_extended circle_extended
PGS_SQL = pgs_types.sql pgs_point.sql pgs_euler.sql pgs_circle.sql \
pgs_line.sql pgs_ellipse.sql pgs_polygon.sql pgs_path.sql \
pgs_box.sql pgs_contains_ops.sql pgs_contains_ops_compat.sql \
pgs_gist.sql gnomo.sql pgs_brin.sql
pgs_gist.sql gnomo.sql pgs_brin.sql pgs_circle_sel.sql

ifneq ($(USE_HEALPIX),0)
REGRESS += healpix moc moc1 moc100 mocautocast
Expand Down Expand Up @@ -102,10 +102,17 @@ healpix_bare/healpix_bare.o : healpix_bare/healpix_bare.c
$(COMPILE.c) -Wno-declaration-after-statement -o $@ $^

pg_version := $(word 2,$(shell $(PG_CONFIG) --version))
has_support_functions = $(if $(filter-out 9.% 10.% 11.%,$(pg_version)),y,n)

crushtest: REGRESS += $(CRUSH_TESTS)
crushtest: installcheck

ifeq ($(has_support_functions),y)
PGS_SQL += pgs_gist_support.sql
REGRESS += gist_support
TESTS += gist_support
endif

test: pg_sphere.test.sql
$(pg_regress_installcheck) --temp-instance=tmp_check $(REGRESS_OPTS) $(TESTS)

Expand Down Expand Up @@ -180,8 +187,11 @@ pg_sphere--1.2.3--1.3.0.sql: pgs_brin.sql.in
pg_sphere--1.3.0--1.3.1.sql:
cat upgrade_scripts/$@.in > $@

pg_sphere--1.3.1--1.3.2.sql:
cat upgrade_scripts/$@.in > $@
ifeq ($(has_support_functions),y)
pg_sphere--1.3.1--1.3.2.sql: pgs_gist_support.sql.in
endif
pg_sphere--1.3.1--1.3.2.sql: pgs_circle_sel.sql.in
cat upgrade_scripts/$@.in $^ > $@
df7cb marked this conversation as resolved.
Show resolved Hide resolved

# end of local stuff

Expand Down
35 changes: 35 additions & 0 deletions doc/functions.sgm
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,41 @@
</example>
</sect2>

<sect2 id="func.spoint_dwithin">
<title>
Point-within-distance function
</title>
<para>
The function
</para>
<funcsynopsis>
<funcprototype>
<funcdef><function>spoint_dwithin</function></funcdef>
<paramdef>spoint <parameter>p1</parameter></paramdef>
<paramdef>spoint <parameter>p2</parameter></paramdef>
<paramdef>float8 <parameter>radius</parameter></paramdef>
</funcprototype>
</funcsynopsis>
<para>
returns a boolean value that signifies whether the points
<parameter>p1</parameter> and <parameter>p2</parameter>
lie within distance <parameter>radius</parameter> (in radians) of each other, i.e.
it computes the boolean expression <literal>p1 &lt;-> p2 &lt;= radius</literal>.
On PostgreSQL 12 and later, the function has <literal>GiST</literal>
support and the PostgreSQL optimizer will transform it to either
<literal>p1 &lt;@ scircle(p2, radius)</literal> or
<literal>p2 &lt;@ scircle(p1, radius)</literal> where appropriate.
</para>
<example>
<title>
Efficiently join two tables of points with some fuzziness permitted
</title>
<programlisting>
<![CDATA[sql> SELECT * FROM stars1 JOIN stars2 WHERE spoint_dwithin(stars1.s, stars2.s, 1e-5);]]>
</programlisting>
</example>
</sect2>

</sect1>

<sect1 id="funcs.strans">
Expand Down
155 changes: 155 additions & 0 deletions expected/gist_support.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
-- spoint_dwithin function selectivity
set jit = off; -- suppress extra planning output
select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), 1)');
explain
-----------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '1'::double precision)
Rows Removed by Filter: 1560
Heap Blocks: exact=55
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .1)');
explain
-------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .01)');
explain
---------------------------------------------------------------------------------------------
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
(2 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, 1)');
explain
-----------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '1'::double precision)
Rows Removed by Filter: 1560
Heap Blocks: exact=55
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .1)');
explain
-------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .01)');
explain
---------------------------------------------------------------------------------------------
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
(2 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, 1)', do_analyze := 'false');
explain
---------------------------------------------------------------------------------------
Nested Loop (rows=22984885 width=32)
-> Seq Scan on spoint10k a (rows=10000 width=16)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=2298 width=16)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '1'::double precision))
(4 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .1)');
explain
-----------------------------------------------------------------------------------------------------------
Nested Loop (rows=249792 width=32) (actual rows=505342 loops=1)
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=25 width=16) (actual rows=51 loops=10000)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.1'::double precision))
Rows Removed by Index Recheck: 31
(5 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)');
explain
---------------------------------------------------------------------------------------------------------
Nested Loop (rows=2500 width=32) (actual rows=17614 loops=1)
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=2 loops=10000)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 1
(5 rows)

-- spoint_dwithin is symmetric in the first two arguments
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1)');
explain
------------------------------------------------------------------------------------------------------
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 0
(10 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(b.star, a.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1)');
explain
------------------------------------------------------------------------------------------------------
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 0
(10 rows)

-- both sides indexable, check if the planner figures out the better choice
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1) and spoint_dwithin(b.star, spoint(1,1), .05)');
explain
-------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
-> Bitmap Heap Scan on spoint10k b (rows=6 width=16) (actual rows=12 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
Rows Removed by Filter: 4
Heap Blocks: exact=14
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k a (rows=1 width=16) (actual rows=1 loops=12)
Index Cond: ((star OPERATOR(public.<@) scircle(b.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
Rows Removed by Index Recheck: 0
(10 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .05) and spoint_dwithin(b.star, spoint(1,1), .1)');
explain
-------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=6 width=16) (actual rows=12 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
Rows Removed by Filter: 4
Heap Blocks: exact=14
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=12)
Index Cond: ((star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
Rows Removed by Index Recheck: 0
(10 rows)

2 changes: 1 addition & 1 deletion expected/index.out
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ SELECT count(*) FROM spheretmp4 WHERE l && scircle '<(1,1),0.3>';

-- create idx
CREATE TABLE spheretmp1b AS TABLE spheretmp1;
ANALYZE spheretmp1;
df7cb marked this conversation as resolved.
Show resolved Hide resolved
CREATE INDEX aaaidx ON spheretmp1 USING gist ( p );
CREATE INDEX spoint3_idx ON spheretmp1b USING gist (p spoint3);
CREATE INDEX bbbidx ON spheretmp2 USING gist ( c );
Expand Down Expand Up @@ -165,7 +166,6 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM spheretmp1b WHERE p = spoint '(3.09 , 1
4
(1 row)

SET enable_bitmapscan = ON;
SET enable_indexonlyscan = OFF;
EXPLAIN (COSTS OFF) SELECT count(*) FROM spheretmp1b WHERE p <@ scircle '<(1,1),0.3>';
QUERY PLAN
Expand Down
16 changes: 16 additions & 0 deletions expected/points.out
Original file line number Diff line number Diff line change
Expand Up @@ -666,3 +666,19 @@ SELECT '( 0h 2m 30s , -90d 0m 0s)'::spoint<->'( 12h 2m 30s , -90d 0m 0s)'::spoin
0
(1 row)

-- spoint_dwithin function ----------
SELECT a, b, radius, a <-> b AS "<->", spoint_dwithin(a, b, radius)
FROM (VALUES
('(0, 0)'::spoint, '(0, 0)'::spoint, 0),
('(0, 0)', '(0, 1)', 1),
('(0, 0)', '(0.1, 0.1)', 0.14),
('(0, 0)', '(0.1, 0.1)', 0.15)
) sub (a, b, radius);
a | b | radius | <-> | spoint_dwithin
---------+-------------+--------+-----------------+----------------
(0 , 0) | (0 , 0) | 0 | 0 | t
(0 , 0) | (0 , 1) | 1 | 1 | t
(0 , 0) | (0.1 , 0.1) | 0.14 | 0.1413032986961 | f
(0 , 0) | (0.1 , 0.1) | 0.15 | 0.1413032986961 | t
(4 rows)

Loading