Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of nested polygons with --update mode #398

Merged
merged 5 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions db/deploy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# PgOSM Flex SQL deploy scripts

The scripts in this folder are executed during PgOSM Flex initialization via
the `prepare_osm_schema()` function in `docker/db.py`.
New or removed files in this folder must be adjusted in that function
as appropriate.
5 changes: 5 additions & 0 deletions db/deploy/replication_functions.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
/*
Creates functions used for maintaining data when --replication is used.

These functions are also used when using `--update append` mode of
PgOSM Flex.
*/
BEGIN;


Expand Down
48 changes: 40 additions & 8 deletions docker/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ def log_pg_details():


def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
"""Runs through series of steps to prepare database for PgOSM.
"""Runs through steps to prepare the target database for PgOSM Flex.

Includes additional preparation for using --replication and --updated=append
modes.

Parameters
--------------------------
Expand Down Expand Up @@ -245,6 +248,9 @@ def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
schema_name=schema_name)
run_insert_pgosm_road(db_path=db_path, schema_name=schema_name)

if import_mode.replication_update or import_mode.update == 'append':
osm2pgsql_replication_start()


def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info,
osm2pgsql_version, import_mode, schema_name, input_file):
Expand Down Expand Up @@ -477,7 +483,7 @@ def get_db_conn(conn_string):
return conn


def pgosm_after_import(flex_path):
def pgosm_after_import(flex_path: str) -> bool:
"""Runs post-processing SQL via Lua script.

Layerset logic is established via environment variable, must happen
Expand Down Expand Up @@ -508,17 +514,38 @@ def pgosm_after_import(flex_path):


def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
"""Runs stored procedure to calculate nested admin polygons via psql.
"""Runs two stored procedures to calculate nested admin polygons via psql.

Parameters
----------------------
flex_path : str
schema_name : str
"""
sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();'
# Populate the table
sql_raw_1 = f'CALL {schema_name}.populate_place_polygon_nested();'

conn_string = os.environ['PGOSM_CONN']
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
cmds = ['psql', '-d', conn_string, '-c', sql_raw_1]
LOGGER.info('Populating place_polygon_nested table (osm.populate_place_polygon_nested() )')
output = subprocess.run(cmds,
text=True,
cwd=flex_path,
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
LOGGER.info(f'Nested polygon output: \n {output.stdout}')

if output.returncode != 0:
err_msg = f'Failed to populate nested polygon data. Return code: {output.returncode}'
LOGGER.error(err_msg)
sys.exit(f'{err_msg} - Check the log output for details.')


# Build the data
sql_raw_2 = f' CALL {schema_name}.build_nested_admin_polygons();'

conn_string = os.environ['PGOSM_CONN']
cmds = ['psql', '-d', conn_string, '-c', sql_raw_2]
LOGGER.info('Building nested polygons... (this can take a while)')
output = subprocess.run(cmds,
text=True,
Expand All @@ -537,18 +564,23 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):

def osm2pgsql_replication_start():
"""Runs pre-replication step to clean out FKs that would prevent updates.

This function is necessary for using `--replication (osm2pgsql-replication)
and `--update append` mode.
"""
LOGGER.info('Prep database to allow data updates.')
# This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append"
sql_raw = 'CALL osm.append_data_start();'

with get_db_conn(conn_string=connection_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw)


def osm2pgsql_replication_finish(skip_nested):
"""Runs post-replication step to put FKs back and refresh materialied views.
def osm2pgsql_replication_finish(skip_nested: bool):
"""Runs post-replication step to refresh materialized views and rebuild
nested data when appropriate.

Only needed for `--replication`, not used for `--update append` mode.

Parameters
---------------------
Expand Down
22 changes: 14 additions & 8 deletions docker/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ def verify_checksum(md5_file: str, path: str):
logger.debug('md5sum validated')


def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
layerset_path, replication, schema_name):
def set_env_vars(region: str, subregion: str, srid: str, language: str,
pgosm_date: str, layerset: str,
layerset_path: str, schema_name: str, skip_nested: bool):
"""Sets environment variables needed by PgOSM Flex. Also creates DB
record in `osm.pgosm_flex` table.

Expand All @@ -122,11 +123,11 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
language : str
pgosm_date : str
layerset : str
Name of layerset matching the INI filename.
layerset_path : str
str when set, or None
replication : bool
Indicates when osm2pgsql-replication is used
schema_name : str
skip_nested : bool
"""
logger = logging.getLogger('pgosm-flex')
logger.debug('Ensuring env vars are not set from prior run')
Expand Down Expand Up @@ -159,6 +160,7 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
pgosm_region = get_region_combined(region, subregion)
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')

os.environ['SKIP_NESTED'] = str(skip_nested)


def get_region_combined(region: str, subregion: str) -> str:
Expand Down Expand Up @@ -225,7 +227,7 @@ def get_git_info(tag_only: bool=False) -> str:


def unset_env_vars():
"""Unsets environment variables used by PgOSM Flex.
"""Unset environment variables used by PgOSM Flex.

Does not pop POSTGRES_DB on purpose to allow non-Docker operation.
"""
Expand All @@ -239,6 +241,7 @@ def unset_env_vars():
os.environ.pop('PGOSM_CONN', None)
os.environ.pop('PGOSM_CONN_PG', None)
os.environ.pop('SCHEMA_NAME', None)
os.environ.pop('SKIP_NESTED', None)


class ImportMode():
Expand Down Expand Up @@ -310,17 +313,17 @@ def okay_to_run(self, prior_import: dict) -> bool:
"""
self.logger.debug(f'Checking if it is okay to run...')
if self.force:
self.logger.warning(f'Using --force, kiss existing data goodbye')
self.logger.warning('Using --force, kiss existing data goodbye.')
return True

# If no prior imports, do not require force
if len(prior_import) == 0:
self.logger.debug(f'No prior import found, okay to proceed.')
self.logger.debug('No prior import found, okay to proceed.')
return True

prior_replication = prior_import['replication']

# Check git version against latest.
# Check PgOSM version using Git tags
# If current version is lower than prior version from latest import, stop.
prior_import_version = prior_import['pgosm_flex_version_no_hash']
git_tag = get_git_info(tag_only=True)
Expand All @@ -345,6 +348,9 @@ def okay_to_run(self, prior_import: dict) -> bool:
self.logger.debug('Okay to proceed with replication')
return True

if self.update == 'append':
return True

msg = 'Prior data exists in the osm schema and --force was not used.'
self.logger.error(msg)
return False
Expand Down
11 changes: 7 additions & 4 deletions docker/pgosm_flex.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def run_pgosm_flex(ram, region, subregion, debug, force,
region = input_file

helpers.set_env_vars(region, subregion, srid, language, pgosm_date,
layerset, layerset_path, replication, schema_name)
layerset, layerset_path, schema_name,
skip_nested)
db.wait_for_postgres()
if force and db.pg_conn_parts()['pg_host'] == 'localhost':
msg = 'Using --force with the built-in database is unnecessary.'
Expand Down Expand Up @@ -267,7 +268,6 @@ def run_replication_update(skip_nested, flex_path):
"""
logger = logging.getLogger('pgosm-flex')
conn_string = db.connection_string()
db.osm2pgsql_replication_start()

update_cmd = """
osm2pgsql-replication update -d $PGOSM_CONN \
Expand Down Expand Up @@ -531,10 +531,13 @@ def run_post_processing(flex_path, skip_nested, import_mode, schema_name):
logger = logging.getLogger('pgosm-flex')

if not import_mode.run_post_sql:
logger.info('Running with --update append: Skipping post-processing SQL')
msg = 'Running with --update append: Skipping post-processing SQL.'
msg += ' Running osm2pgsql_replication_finish() instead.'
logger.info(msg)
db.osm2pgsql_replication_finish(skip_nested=skip_nested)
return True

post_processing_sql = db.pgosm_after_import(flex_path)
post_processing_sql = db.pgosm_after_import(flex_path=flex_path)

if skip_nested:
logger.info('Skipping calculating nested polygons')
Expand Down
8 changes: 4 additions & 4 deletions docker/tests/test_geofabrik.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def setUp(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)


def tearDown(self):
Expand All @@ -44,8 +44,8 @@ def test_get_region_filename_returns_region_when_subregion_None(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

result = geofabrik.get_region_filename()
expected = f'{REGION_US}-latest.osm.pbf'
Expand Down
28 changes: 14 additions & 14 deletions docker/tests/test_pgosm_flex.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def setUp(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)


def tearDown(self):
Expand Down Expand Up @@ -91,8 +91,8 @@ def test_get_export_filename_region_only(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

input_file = None
result = pgosm_flex.get_export_filename(input_file)
Expand All @@ -109,8 +109,8 @@ def test_layerset_include_place_returns_boolean(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=layerset_path,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

paths = pgosm_flex.get_paths()
result = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
Expand All @@ -128,8 +128,8 @@ def test_layerset_include_place_returns_True_with_default_layerset(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=layerset_path,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

paths = pgosm_flex.get_paths()
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
Expand All @@ -147,8 +147,8 @@ def test_layerset_include_place_returns_false_when_place_false_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

paths = pgosm_flex.get_paths()
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
Expand All @@ -166,8 +166,8 @@ def test_layerset_include_place_returns_false_when_place_missing_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

paths = pgosm_flex.get_paths()
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
Expand All @@ -185,8 +185,8 @@ def test_layerset_include_place_returns_true_when_place_true_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
replication=False,
schema_name='osm')
schema_name='osm',
skip_nested=True)

paths = pgosm_flex.get_paths()
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
Expand Down
Loading
Loading