Skip to content

Commit

Permalink
Implement restriction selectivity estimators for <@(spoint, scircle)
Browse files Browse the repository at this point in the history
This implements restriction selectivity estimation for the <@ @> !<@ !@>
family of operators on spoint and scircle. The selectivity is estimated
to be (area of sphere circle) / (4 pi).

Queries like `select * from sky where sky.star <@ scircle(const, radius)`
will be able to properly estimate if using an index is appropriate
depending on the size of radius.

Secondly, a function spoint_dwithin(p1 spoint, p2 spoint, radius float8)
is added that effectively returns `p1 <-> p2 <= radius`. But other than
this two-operator expression, it has GIST index support so the optimizer
can rewrite it to either `p1 <@ scircle(p2, radius)` or `p2 <@
scircle(p1, radius)`, i.e. it is symmetric in the first two arguments.

This allows efficient matching queries without the user having to encode
the join ordering in the query.

On PostgreSQL 10/11, the spoint_dwithin function is created, but without
the GIST support since that only appeared in PG12.

The file expected/selectivity_1.out is used on PG10/11; it has <@
flipped around to @> in some plans.
  • Loading branch information
df7cb committed Oct 19, 2023
1 parent f7ab4c4 commit 65217de
Show file tree
Hide file tree
Showing 18 changed files with 1,132 additions and 10 deletions.
24 changes: 17 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ SRC_DIR = $(shell basename $(shell pwd))

MODULE_big = pg_sphere
OBJS = src/sscan.o src/sparse.o src/sbuffer.o src/vector3d.o src/point.o \
src/euler.o src/circle.o src/line.o src/ellipse.o src/polygon.o \
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o \
src/euler.o src/circle.o src/circle_sel.o src/line.o src/ellipse.o src/polygon.o \
src/path.o src/box.o src/output.o src/gq_cache.o src/gist.o src/gist_support.o \
src/key.o src/gnomo.o src/epochprop.o src/brin.o

ifneq ($(USE_HEALPIX),0)
Expand All @@ -35,11 +35,11 @@ DATA_built = $(RELEASE_SQL) \
DOCS = README.pg_sphere COPYRIGHT.pg_sphere
REGRESS = init tables points euler circle line ellipse poly path box index \
contains_ops contains_ops_compat bounding_box_gist gnomo epochprop \
contains overlaps spoint_brin sbox_brin
contains overlaps spoint_brin sbox_brin selectivity

TESTS = init_test tables points euler circle line ellipse poly path box \
index contains_ops contains_ops_compat bounding_box_gist gnomo \
epochprop contains overlaps spoint_brin sbox_brin
epochprop contains overlaps spoint_brin sbox_brin selectivity

PG_CFLAGS += -DPGSPHERE_VERSION=$(PGSPHERE_VERSION)
PG_CPPFLAGS += -DPGSPHERE_VERSION=$(PGSPHERE_VERSION)
Expand All @@ -58,7 +58,7 @@ CRUSH_TESTS = init_extended circle_extended
PGS_SQL = pgs_types.sql pgs_point.sql pgs_euler.sql pgs_circle.sql \
pgs_line.sql pgs_ellipse.sql pgs_polygon.sql pgs_path.sql \
pgs_box.sql pgs_contains_ops.sql pgs_contains_ops_compat.sql \
pgs_gist.sql gnomo.sql pgs_brin.sql
pgs_gist.sql gnomo.sql pgs_brin.sql pgs_circle_sel.sql

ifneq ($(USE_HEALPIX),0)
REGRESS += healpix moc moc1 moc100 mocautocast
Expand Down Expand Up @@ -102,10 +102,17 @@ healpix_bare/healpix_bare.o : healpix_bare/healpix_bare.c
$(COMPILE.c) -Wno-declaration-after-statement -o $@ $^

pg_version := $(word 2,$(shell $(PG_CONFIG) --version))
has_support_functions = $(if $(filter-out 9.% 10.% 11.%,$(pg_version)),y,n)

crushtest: REGRESS += $(CRUSH_TESTS)
crushtest: installcheck

ifeq ($(has_support_functions),y)
PGS_SQL += pgs_gist_support.sql
REGRESS += gist_support
TESTS += gist_support
endif

test: pg_sphere.test.sql
$(pg_regress_installcheck) --temp-instance=tmp_check $(REGRESS_OPTS) $(TESTS)

Expand Down Expand Up @@ -180,8 +187,11 @@ pg_sphere--1.2.3--1.3.0.sql: pgs_brin.sql.in
pg_sphere--1.3.0--1.3.1.sql:
cat upgrade_scripts/$@.in > $@

pg_sphere--1.3.1--1.3.2.sql:
cat upgrade_scripts/$@.in > $@
ifeq ($(has_support_functions),y)
pg_sphere--1.3.1--1.3.2.sql: pgs_gist_support.sql.in
endif
pg_sphere--1.3.1--1.3.2.sql: pgs_circle_sel.sql.in
cat upgrade_scripts/$@.in $^ > $@

# end of local stuff

Expand Down
35 changes: 35 additions & 0 deletions doc/functions.sgm
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,41 @@
</example>
</sect2>

<sect2 id="func.spoint_dwithin">
<title>
Point-within-distance function
</title>
<para>
The function
</para>
<funcsynopsis>
<funcprototype>
<funcdef><function>spoint_dwithin</function></funcdef>
<paramdef>spoint <parameter>p1</parameter></paramdef>
<paramdef>spoint <parameter>p2</parameter></paramdef>
<paramdef>float8 <parameter>radius</parameter></paramdef>
</funcprototype>
</funcsynopsis>
<para>
returns a boolean value that signifies whether the points
<parameter>p1</parameter> and <parameter>p2</parameter>
lie within distance <parameter>radius</parameter> (in radians) of each other, i.e.
it computes the boolean expression <literal>p1 &lt;-> p2 &lt;= radius</literal>.
On PostgreSQL 12 and later, the function has <literal>GiST</literal>
support and the PostgreSQL optimizer will transform it to either
<literal>p1 &lt;@ scircle(p2, radius)</literal> or
<literal>p2 &lt;@ scircle(p1, radius)</literal> where appropriate.
</para>
<example>
<title>
Efficiently join two tables of points with some fuzziness permitted
</title>
<programlisting>
<![CDATA[sql> SELECT * FROM stars1 JOIN stars2 WHERE spoint_dwithin(stars1.s, stars2.s, 1e-5);]]>
</programlisting>
</example>
</sect2>

</sect1>

<sect1 id="funcs.strans">
Expand Down
155 changes: 155 additions & 0 deletions expected/gist_support.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
-- spoint_dwithin function selectivity
set jit = off; -- suppress extra planning output
select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), 1)');
explain
-----------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '1'::double precision)
Rows Removed by Filter: 1560
Heap Blocks: exact=55
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .1)');
explain
-------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(star, spoint(1,1), .01)');
explain
---------------------------------------------------------------------------------------------
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
(2 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, 1)');
explain
-----------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=2298 width=16) (actual rows=3009 loops=1)
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '1'::double precision)
Rows Removed by Filter: 1560
Heap Blocks: exact=55
-> Bitmap Index Scan on spoint10k_star_idx (rows=2298 width=0) (actual rows=4569 loops=1)
Index Cond: (star <@ '<(1 , 1) , 1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .1)');
explain
-------------------------------------------------------------------------------------------
Bitmap Heap Scan on spoint10k (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin('(1 , 1)'::spoint, star, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
(6 rows)

select explain('select * from spoint10k where spoint_dwithin(spoint(1,1), star, .01)');
explain
---------------------------------------------------------------------------------------------
Index Scan using spoint10k_star_idx on spoint10k (rows=1 width=16) (actual rows=1 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.01>'::scircle)
(2 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, 1)', do_analyze := 'false');
explain
---------------------------------------------------------------------------------------
Nested Loop (rows=22984885 width=32)
-> Seq Scan on spoint10k a (rows=10000 width=16)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=2298 width=16)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '1'::double precision))
(4 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .1)');
explain
-----------------------------------------------------------------------------------------------------------
Nested Loop (rows=249792 width=32) (actual rows=505342 loops=1)
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=25 width=16) (actual rows=51 loops=10000)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.1'::double precision))
Rows Removed by Index Recheck: 31
(5 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)');
explain
---------------------------------------------------------------------------------------------------------
Nested Loop (rows=2500 width=32) (actual rows=17614 loops=1)
-> Seq Scan on spoint10k a (rows=10000 width=16) (actual rows=10000 loops=1)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=2 loops=10000)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 1
(5 rows)

-- spoint_dwithin is symmetric in the first two arguments
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1)');
explain
------------------------------------------------------------------------------------------------------
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 0
(10 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(b.star, a.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1)');
explain
------------------------------------------------------------------------------------------------------
Nested Loop (rows=6 width=32) (actual rows=33 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=25 width=16) (actual rows=29 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.1'::double precision)
Rows Removed by Filter: 19
Heap Blocks: exact=32
-> Bitmap Index Scan on spoint10k_star_idx (rows=25 width=0) (actual rows=48 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.1>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=29)
Index Cond: (star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision))
Rows Removed by Index Recheck: 0
(10 rows)

-- both sides indexable, check if the planner figures out the better choice
select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .1) and spoint_dwithin(b.star, spoint(1,1), .05)');
explain
-------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
-> Bitmap Heap Scan on spoint10k b (rows=6 width=16) (actual rows=12 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
Rows Removed by Filter: 4
Heap Blocks: exact=14
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k a (rows=1 width=16) (actual rows=1 loops=12)
Index Cond: ((star OPERATOR(public.<@) scircle(b.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
Rows Removed by Index Recheck: 0
(10 rows)

select explain('select * from spoint10k a join spoint10k b on spoint_dwithin(a.star, b.star, .01)
where spoint_dwithin(a.star, spoint(1,1), .05) and spoint_dwithin(b.star, spoint(1,1), .1)');
explain
-------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (rows=1 width=32) (actual rows=16 loops=1)
-> Bitmap Heap Scan on spoint10k a (rows=6 width=16) (actual rows=12 loops=1)
Filter: spoint_dwithin(star, '(1 , 1)'::spoint, '0.05'::double precision)
Rows Removed by Filter: 4
Heap Blocks: exact=14
-> Bitmap Index Scan on spoint10k_star_idx (rows=6 width=0) (actual rows=16 loops=1)
Index Cond: (star <@ '<(1 , 1) , 0.05>'::scircle)
-> Index Scan using spoint10k_star_idx on spoint10k b (rows=1 width=16) (actual rows=1 loops=12)
Index Cond: ((star OPERATOR(public.<@) scircle(a.star, '0.01'::double precision)) AND (star <@ '<(1 , 1) , 0.1>'::scircle))
Rows Removed by Index Recheck: 0
(10 rows)

2 changes: 1 addition & 1 deletion expected/index.out
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ SELECT count(*) FROM spheretmp4 WHERE l && scircle '<(1,1),0.3>';

-- create idx
CREATE TABLE spheretmp1b AS TABLE spheretmp1;
ANALYZE spheretmp1;
CREATE INDEX aaaidx ON spheretmp1 USING gist ( p );
CREATE INDEX spoint3_idx ON spheretmp1b USING gist (p spoint3);
CREATE INDEX bbbidx ON spheretmp2 USING gist ( c );
Expand Down Expand Up @@ -165,7 +166,6 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM spheretmp1b WHERE p = spoint '(3.09 , 1
4
(1 row)

SET enable_bitmapscan = ON;
SET enable_indexonlyscan = OFF;
EXPLAIN (COSTS OFF) SELECT count(*) FROM spheretmp1b WHERE p <@ scircle '<(1,1),0.3>';
QUERY PLAN
Expand Down
16 changes: 16 additions & 0 deletions expected/points.out
Original file line number Diff line number Diff line change
Expand Up @@ -666,3 +666,19 @@ SELECT '( 0h 2m 30s , -90d 0m 0s)'::spoint<->'( 12h 2m 30s , -90d 0m 0s)'::spoin
0
(1 row)

-- spoint_dwithin function ----------
SELECT a, b, radius, a <-> b AS "<->", spoint_dwithin(a, b, radius)
FROM (VALUES
('(0, 0)'::spoint, '(0, 0)'::spoint, 0),
('(0, 0)', '(0, 1)', 1),
('(0, 0)', '(0.1, 0.1)', 0.14),
('(0, 0)', '(0.1, 0.1)', 0.15)
) sub (a, b, radius);
a | b | radius | <-> | spoint_dwithin
---------+-------------+--------+-----------------+----------------
(0 , 0) | (0 , 0) | 0 | 0 | t
(0 , 0) | (0 , 1) | 1 | 1 | t
(0 , 0) | (0.1 , 0.1) | 0.14 | 0.1413032986961 | f
(0 , 0) | (0.1 , 0.1) | 0.15 | 0.1413032986961 | t
(4 rows)

Loading

0 comments on commit 65217de

Please sign in to comment.