Skip to content

Commit

Permalink
Allow Optimized searches for tiled requets. (#39)
Browse files Browse the repository at this point in the history
* xyz tile search

* add functions for bulk create_items and upsert_items

* update tests, create migrations

* add exitwhenfull flag

Co-authored-by: vincentsarago <[email protected]>
  • Loading branch information
bitner and vincentsarago authored Aug 24, 2021
1 parent 1c0694e commit 0a1a655
Show file tree
Hide file tree
Showing 16 changed files with 2,289 additions and 10 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

## [v0.3.4]

### Added

- add `geometrysearch`, `geojsonsearch` and `xyzsearch` for optimized searches for tiled requets ([#39](https://github.com/stac-utils/pgstac/pull/39))

## [v0.3.3]

## Fixed

- Fixed CQL term to be "id", not "ids" ([#46](https://github.com/stac-utils/pgstac/pull/46))
- Make sure featureCollection response has empty features `[]` not `null` ([#46](https://github.com/stac-utils/pgstac/pull/46))
- Fixed bugs for `sortby` and `pagination` ([#46](https://github.com/stac-utils/pgstac/pull/46))
- Make sure pgtap errors get caught in CI ([#46](https://github.com/stac-utils/pgstac/pull/46))

## [v0.3.2]

## Fixed
Expand Down
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ services:
build:
context: .
dockerfile: Dockerfile.dev
platform: linux/amd64
depends_on:
- database
volumes:
Expand All @@ -20,6 +21,7 @@ services:
build:
context: .
dockerfile: Dockerfile
platform: linux/amd64
environment:
- POSTGRES_USER=username
- POSTGRES_PASSWORD=password
Expand All @@ -29,7 +31,7 @@ services:
- PGHOST=localhost
- PGDATABASE=postgis
ports:
- "5432:5432"
- "5439:5432"
volumes:
- pgstac-pgdata:/var/lib/postgresql/data
- ./:/opt/src
Expand Down
2 changes: 2 additions & 0 deletions pgstac.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ BEGIN;
\i sql/002_collections.sql
\i sql/003_items.sql
\i sql/004_search.sql
\i sql/005_tileutils.sql
\i sql/006_tilesearch.sql
\i sql/999_version.sql
COMMIT;
2 changes: 1 addition & 1 deletion pypgstac/pypgstac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""PyPGStac Version."""
__version__ = "0.3.3"
__version__ = "0.3.4"
200 changes: 200 additions & 0 deletions pypgstac/pypgstac/migrations/pgstac.0.3.3-0.3.4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
SET SEARCH_PATH to pgstac, public;
set check_function_bodies = off;

CREATE OR REPLACE FUNCTION pgstac.create_items(data jsonb)
RETURNS void
LANGUAGE sql
SET search_path TO 'pgstac', 'public'
AS $function$
INSERT INTO items_staging (content)
SELECT * FROM jsonb_array_elements(data);
$function$
;

CREATE OR REPLACE FUNCTION pgstac.ftime()
RETURNS interval
LANGUAGE sql
AS $function$
SELECT age(clock_timestamp(), transaction_timestamp());
$function$
;

CREATE OR REPLACE FUNCTION pgstac.geojsonsearch(geojson jsonb, queryhash text, fields jsonb DEFAULT NULL::jsonb, _scanlimit integer DEFAULT 10000, _limit integer DEFAULT 100, _timelimit interval DEFAULT '00:00:05'::interval, exitwhenfull boolean DEFAULT true, skipcovered boolean DEFAULT true)
RETURNS jsonb
LANGUAGE sql
AS $function$
SELECT * FROM geometrysearch(
st_geomfromgeojson(geojson),
queryhash,
fields,
_scanlimit,
_limit,
_timelimit,
exitwhenfull,
skipcovered
);
$function$
;

CREATE OR REPLACE FUNCTION pgstac.geometrysearch(geom geometry, queryhash text, fields jsonb DEFAULT NULL::jsonb, _scanlimit integer DEFAULT 10000, _limit integer DEFAULT 100, _timelimit interval DEFAULT '00:00:05'::interval, exitwhenfull boolean DEFAULT true, skipcovered boolean DEFAULT true)
RETURNS jsonb
LANGUAGE plpgsql
AS $function$
DECLARE
search searches%ROWTYPE;
curs refcursor;
_where text;
query text;
iter_record items%ROWTYPE;
out_records jsonb[] := '{}'::jsonb[];
exit_flag boolean := FALSE;
counter int := 1;
scancounter int := 1;
remaining_limit int := _scanlimit;
tilearea float;
unionedgeom geometry;
clippedgeom geometry;
unionedgeom_area float := 0;
prev_area float := 0;
excludes text[];
includes text[];

BEGIN
-- If skipcovered is true then you will always want to exit when the passed in geometry is full
IF skipcovered THEN
exitwhenfull := TRUE;
END IF;

SELECT * INTO search FROM searches WHERE hash=queryhash;

IF NOT FOUND THEN
RAISE EXCEPTION 'Search with Query Hash % Not Found', queryhash;
END IF;

tilearea := st_area(geom);
_where := format('%s AND st_intersects(geometry, %L::geometry)', search._where, geom);

IF fields IS NOT NULL THEN
IF fields ? 'fields' THEN
fields := fields->'fields';
END IF;
IF fields ? 'exclude' THEN
excludes=textarr(fields->'exclude');
END IF;
IF fields ? 'include' THEN
includes=textarr(fields->'include');
IF array_length(includes, 1)>0 AND NOT 'id' = ANY (includes) THEN
includes = includes || '{id}';
END IF;
END IF;
END IF;
RAISE NOTICE 'fields: %, includes: %, excludes: %', fields, includes, excludes;

FOR query IN SELECT * FROM partition_queries(_where, search.orderby) LOOP
query := format('%s LIMIT %L', query, remaining_limit);
RAISE NOTICE '%', query;
curs = create_cursor(query);
LOOP
FETCH curs INTO iter_record;
EXIT WHEN NOT FOUND;
IF exitwhenfull OR skipcovered THEN -- If we are not using exitwhenfull or skipcovered, we do not need to do expensive geometry operations
clippedgeom := st_intersection(geom, iter_record.geometry);

IF unionedgeom IS NULL THEN
unionedgeom := clippedgeom;
ELSE
unionedgeom := st_union(unionedgeom, clippedgeom);
END IF;

unionedgeom_area := st_area(unionedgeom);

IF skipcovered AND prev_area = unionedgeom_area THEN
scancounter := scancounter + 1;
CONTINUE;
END IF;

prev_area := unionedgeom_area;

RAISE NOTICE '% % % %', unionedgeom_area/tilearea, counter, scancounter, ftime();
END IF;

IF fields IS NOT NULL THEN
out_records := out_records || filter_jsonb(iter_record.content, includes, excludes);
ELSE
out_records := out_records || iter_record.content;
END IF;
IF counter >= _limit
OR scancounter > _scanlimit
OR ftime() > _timelimit
OR (exitwhenfull AND unionedgeom_area >= tilearea)
THEN
exit_flag := TRUE;
EXIT;
END IF;
counter := counter + 1;
scancounter := scancounter + 1;

END LOOP;
EXIT WHEN exit_flag;
remaining_limit := _scanlimit - scancounter;
END LOOP;

RETURN jsonb_build_object(
'type', 'FeatureCollection',
'features', array_to_json(out_records)::jsonb
);
END;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.tileenvelope(zoom integer, x integer, y integer)
RETURNS geometry
LANGUAGE sql
IMMUTABLE PARALLEL SAFE
AS $function$
WITH t AS (
SELECT
20037508.3427892 as merc_max,
-20037508.3427892 as merc_min,
(2 * 20037508.3427892) / (2 ^ zoom) as tile_size
)
SELECT st_makeenvelope(
merc_min + (tile_size * x),
merc_max - (tile_size * (y + 1)),
merc_min + (tile_size * (x + 1)),
merc_max - (tile_size * y),
3857
) FROM t;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.upsert_items(data jsonb)
RETURNS void
LANGUAGE sql
SET search_path TO 'pgstac', 'public'
AS $function$
INSERT INTO items_staging_upsert (content)
SELECT * FROM jsonb_array_elements(data);
$function$
;

CREATE OR REPLACE FUNCTION pgstac.xyzsearch(_x integer, _y integer, _z integer, queryhash text, fields jsonb DEFAULT NULL::jsonb, _scanlimit integer DEFAULT 10000, _limit integer DEFAULT 100, _timelimit interval DEFAULT '00:00:05'::interval, exitwhenfull boolean DEFAULT true, skipcovered boolean DEFAULT true)
RETURNS jsonb
LANGUAGE sql
AS $function$
SELECT * FROM geometrysearch(
st_transform(tileenvelope(_z, _x, _y), 4326),
queryhash,
fields,
_scanlimit,
_limit,
_timelimit,
exitwhenfull,
skipcovered
);
$function$
;



INSERT INTO migrations (version) VALUES ('0.3.4');
Loading

0 comments on commit 0a1a655

Please sign in to comment.