From 11275b0331d64e4135705c4a572e3d4fa592b3c6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 18 Nov 2023 17:16:36 +0100 Subject: [PATCH 1/5] switch replication tests to using pytest-httpserver --- README.md | 10 +- test/test_replication.py | 585 +++++++++++++++++++-------------------- 2 files changed, 284 insertions(+), 311 deletions(-) diff --git a/README.md b/README.md index 022dadfa..1ead9f7e 100644 --- a/README.md +++ b/README.md @@ -74,13 +74,15 @@ They are mostly ports of the examples in Libosmium and osmium-contrib. ## Testing -There is a small test suite in the test directory. This provides regression +There is a small test suite in the test directory. This provides unit test for the python bindings, it is not meant to be a test suite for Libosmium. -You'll need the Python `pytest` module. On Debian/Ubuntu install the package -`python3-pytest`. +Testing requires `pytest` and `pytest-httpserver`. On Debian/Ubuntu install +the dependencies with: -The suite can be run with: + sudo apt-get install python3-pytest python3-pytest-httpserver + +The test suite can be run with: pytest test diff --git a/test/test_replication.py b/test/test_replication.py index 593cd0ab..defca2fa 100644 --- a/test/test_replication.py +++ b/test/test_replication.py @@ -2,13 +2,13 @@ # # This file is part of Pyosmium. # -# Copyright (C) 2022 Sarah Hoffmann. -from io import BytesIO +# Copyright (C) 2023 Sarah Hoffmann. +import time from textwrap import dedent -from urllib.error import URLError import pytest -import requests.exceptions + +from werkzeug.wrappers import Response from helpers import mkdate, CountingHandler @@ -16,15 +16,6 @@ import osmium.replication.server as rserv import osmium.replication -class RequestsResponses(BytesIO): - - def __init__(self, bytes): - super(RequestsResponses, self).__init__(bytes) - self.content = bytes - - def iter_lines(self): - return self.readlines() - @pytest.mark.parametrize("inp,outp", [ (None, 'https://text.org/state.txt'), @@ -60,297 +51,277 @@ def test_get_newest_change_from_file(tmp_path): assert val == mkdate(2018, 10, 29, 3, 56, 7) -class TestReplication: - - @pytest.fixture(params=["requests", "urllib"], autouse=True) - def setup_mocks(self, request, monkeypatch): - self.url_requests = [] - self.url_exception = None - if request.param == "requests": - # Default use of the requests library. Simply patch the get method. - def mock_get(*args, **kwargs): - if self.url_exception is not None: - raise self.url_exception - assert self.url_requests - return RequestsResponses(self.url_requests.pop(0)) - monkeypatch.setattr(osmium.replication.server.requests.Session, "get", mock_get) - - def mk_server(*args, **kwargs): - return rserv.ReplicationServer(*args, **kwargs) - self.mk_replication_server = mk_server - - elif request.param == "urllib": - def mock_get(*args, **kwargs): - if self.url_exception is not None: - raise self.url_exception - assert self.url_requests - return BytesIO(self.url_requests.pop(0)) - def mk_server(*args, **kwargs): - server = rserv.ReplicationServer(*args, **kwargs) - server.open_url = mock_get - return server - self.mk_replication_server = mk_server - - - def set_result(self, content): - self.url_requests = [dedent(content).encode()] - - - def set_script(self, files): - self.url_requests = [dedent(s).encode() for s in files] - - - def test_get_state_valid(self): - self.set_result("""\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-26T11\\:04\\:02Z - txnReadyList= - txnMax=1219304113 - txnActiveList=1219303583,1219304054,1219304104""") - - res = self.mk_replication_server("https://test.io").get_state_info() - - assert res is not None - assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) - assert res.sequence == 2594669 - - - def test_get_state_sequence_cut(self): - self.set_script(("""\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=259""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-26T11\\:04\\:02Z""")) - - res = self.mk_replication_server("https://test.io").get_state_info() - - assert res is not None - assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) - assert res.sequence == 2594669 - - - def test_get_state_date_cut(self): - self.set_script(("""\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-2""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-26T11\\:04\\:02Z""")) - - res = self.mk_replication_server("https://test.io").get_state_info() - - assert res is not None - assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) - assert res.sequence == 2594669 - - - def test_get_state_timestamp_cut(self): - self.set_script(("""\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-26T11\\:04\\:02Z""")) - - res = self.mk_replication_server("https://test.io").get_state_info() - - assert res is not None - assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) - assert res.sequence == 2594669 - - - def test_get_state_too_many_retries(self): - self.set_script(("""\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=""", - """\ - #Sat Aug 26 11:04:04 UTC 2017 - txnMaxQueried=1219304113 - sequenceNumber=2594669 - timestamp=2017-08-26T11\\:04\\:02Z""")) - - res = self.mk_replication_server("https://test.io").get_state_info() - - assert res is None - - - @pytest.mark.parametrize("error", [URLError(reason='Mock'), - requests.exceptions.ConnectTimeout]) - def test_get_state_server_timeout(self, error): - self.url_exception = error - - svr = self.mk_replication_server("https://test.io") - assert svr.get_state_info() is None - - - def test_apply_diffs_count(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 - w1 - r1 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000) - - assert h.counts == [1, 1, 1, 0] - - - def test_apply_diffs_without_simplify(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 v23 - n1 v24 - w1 - r1 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000, simplify=False) - assert [2, 1, 1, 0] == h.counts - - - def test_apply_diffs_with_simplify(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 v23 - n1 v24 - w1 - r1 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000, simplify=True) - assert [1, 1, 1, 0] == h.counts - - - def test_apply_with_location(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 x10.0 y23.0 - w1 Nn1,n2 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - class Handler(CountingHandler): - def way(self, w): - self.counts[1] += 1 - assert 2 == len(w.nodes) - assert 1 == w.nodes[0].ref - assert 10 == w.nodes[0].location.lon - assert 23 == w.nodes[0].location.lat - assert 2 == w.nodes[1].ref - assert not w.nodes[1].location.valid() - - h = Handler() - assert 100 == svr.apply_diffs(h, 100, 10000, idx="flex_mem") - - assert h.counts == [1, 1, 0, 0] - - - def test_apply_reader_without_simplify(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 v23 - n1 v24 - w1 - r1 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - h = CountingHandler() - - diffs = svr.collect_diffs(100, 100000) - assert diffs is not None - - diffs.reader.apply(h, simplify=False) - assert [2, 1, 1, 0] == h.counts - - - def test_apply_reader_with_simplify(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 v23 - n1 v24 - w1 - r1 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - h = CountingHandler() - diffs = svr.collect_diffs(100, 100000) - assert diffs is not None - - diffs.reader.apply(h, simplify=True) - assert [1, 1, 1, 0] == h.counts - - - def test_apply_reader_with_location(self): - self.set_script(("""\ - sequenceNumber=100 - timestamp=2017-08-26T11\\:04\\:02Z - """, """ - n1 x10.0 y23.0 - w1 Nn1,n2 - """)) - svr = self.mk_replication_server("https://test.io", "opl") - - class Handler(CountingHandler): - def way(self, w): - self.counts[1] += 1 - assert 2 == len(w.nodes) - assert 1 == w.nodes[0].ref - assert 10 == w.nodes[0].location.lon - assert 23 == w.nodes[0].location.lat - assert 2 == w.nodes[1].ref - assert not w.nodes[1].location.valid() - - h = Handler() - diffs = svr.collect_diffs(100, 100000) - assert diffs is not None - - diffs.reader.apply(h, idx="flex_mem") - - assert h.counts == [1, 1, 0, 0] +def test_get_state_valid(httpserver): + httpserver.expect_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z + txnReadyList= + txnMax=1219304113 + txnActiveList=1219303583,1219304054,1219304104""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is not None + assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) + assert res.sequence == 2594669 + + +def test_get_state_sequence_cut(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=259""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is not None + assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) + assert res.sequence == 2594669 + + +def test_get_state_date_cut(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-2""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is not None + assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) + assert res.sequence == 2594669 + + +def test_get_state_timestamp_cut(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is not None + assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) + assert res.sequence == 2594669 + + +def test_get_state_too_many_retries(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=""") + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is None + + +def test_get_state_server_timeout(httpserver): + def sleeping(request): + time.sleep(2) + return Response("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z + txnReadyList= + txnMax=1219304113 + txnActiveList=1219303583,1219304054,1219304104""") + + httpserver.expect_request("/state.txt").respond_with_handler(sleeping) + + svr = rserv.ReplicationServer(httpserver.url_for('')) + svr.set_request_parameter('timeout', 1) + + res = svr.get_state_info() + + assert res is None + + +def test_apply_diffs_count(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 + w1 + r1 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000) + + assert h.counts == [1, 1, 1, 0] + + +def test_apply_diffs_without_simplify(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 v23 + n1 v24 + w1 + r1 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000, simplify=False) + assert [2, 1, 1, 0] == h.counts + + +def test_apply_diffs_with_simplify(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 v23 + n1 v24 + w1 + r1 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000, simplify=True) + assert [1, 1, 1, 0] == h.counts + + +def test_apply_with_location(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 x10.0 y23.0 + w1 Nn1,n2 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + class Handler(CountingHandler): + def way(self, w): + self.counts[1] += 1 + assert 2 == len(w.nodes) + assert 1 == w.nodes[0].ref + assert 10 == w.nodes[0].location.lon + assert 23 == w.nodes[0].location.lat + assert 2 == w.nodes[1].ref + assert not w.nodes[1].location.valid() + + h = Handler() + assert 100 == svr.apply_diffs(h, 100, 10000, idx="flex_mem") + + assert h.counts == [1, 1, 0, 0] + + +def test_apply_reader_without_simplify(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 v23 + n1 v24 + w1 + r1 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + h = CountingHandler() + + diffs = svr.collect_diffs(100, 100000) + assert diffs is not None + + diffs.reader.apply(h, simplify=False) + assert [2, 1, 1, 0] == h.counts + + +def test_apply_reader_with_simplify(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 v23 + n1 v24 + w1 + r1 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + h = CountingHandler() + diffs = svr.collect_diffs(100, 100000) + assert diffs is not None + + diffs.reader.apply(h, simplify=True) + assert [1, 1, 1, 0] == h.counts + + +def test_apply_reader_with_location(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 x10.0 y23.0 + w1 Nn1,n2 + """)) + svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") + + class Handler(CountingHandler): + def way(self, w): + self.counts[1] += 1 + assert 2 == len(w.nodes) + assert 1 == w.nodes[0].ref + assert 10 == w.nodes[0].location.lon + assert 23 == w.nodes[0].location.lat + assert 2 == w.nodes[1].ref + assert not w.nodes[1].location.valid() + + h = Handler() + diffs = svr.collect_diffs(100, 100000) + assert diffs is not None + + diffs.reader.apply(h, idx="flex_mem") + + assert h.counts == [1, 1, 0, 0] From e9f456391260ae73a6f2885b6bcda45d30912478 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 18 Nov 2023 17:21:18 +0100 Subject: [PATCH 2/5] use context managment when testing replication --- test/test_replication.py | 62 +++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/test/test_replication.py b/test/test_replication.py index defca2fa..f431848e 100644 --- a/test/test_replication.py +++ b/test/test_replication.py @@ -165,10 +165,10 @@ def sleeping(request): httpserver.expect_request("/state.txt").respond_with_handler(sleeping) - svr = rserv.ReplicationServer(httpserver.url_for('')) - svr.set_request_parameter('timeout', 1) + with rserv.ReplicationServer(httpserver.url_for('')) as svr: + svr.set_request_parameter('timeout', 1) - res = svr.get_state_info() + res = svr.get_state_info() assert res is None @@ -183,12 +183,11 @@ def test_apply_diffs_count(httpserver): w1 r1 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000) - - assert h.counts == [1, 1, 1, 0] + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000) + assert h.counts == [1, 1, 1, 0] def test_apply_diffs_without_simplify(httpserver): @@ -202,11 +201,11 @@ def test_apply_diffs_without_simplify(httpserver): w1 r1 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000, simplify=False) - assert [2, 1, 1, 0] == h.counts + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000, simplify=False) + assert [2, 1, 1, 0] == h.counts def test_apply_diffs_with_simplify(httpserver): @@ -220,11 +219,11 @@ def test_apply_diffs_with_simplify(httpserver): w1 r1 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") - h = CountingHandler() - assert 100 == svr.apply_diffs(h, 100, 10000, simplify=True) - assert [1, 1, 1, 0] == h.counts + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000, simplify=True) + assert [1, 1, 1, 0] == h.counts def test_apply_with_location(httpserver): @@ -236,7 +235,6 @@ def test_apply_with_location(httpserver): n1 x10.0 y23.0 w1 Nn1,n2 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") class Handler(CountingHandler): def way(self, w): @@ -249,9 +247,9 @@ def way(self, w): assert not w.nodes[1].location.valid() h = Handler() - assert 100 == svr.apply_diffs(h, 100, 10000, idx="flex_mem") - - assert h.counts == [1, 1, 0, 0] + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + assert 100 == svr.apply_diffs(h, 100, 10000, idx="flex_mem") + assert h.counts == [1, 1, 0, 0] def test_apply_reader_without_simplify(httpserver): @@ -265,13 +263,12 @@ def test_apply_reader_without_simplify(httpserver): w1 r1 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") - h = CountingHandler() + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + diffs = svr.collect_diffs(100, 100000) - diffs = svr.collect_diffs(100, 100000) assert diffs is not None - diffs.reader.apply(h, simplify=False) assert [2, 1, 1, 0] == h.counts @@ -287,12 +284,12 @@ def test_apply_reader_with_simplify(httpserver): w1 r1 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") - h = CountingHandler() - diffs = svr.collect_diffs(100, 100000) - assert diffs is not None + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + diffs = svr.collect_diffs(100, 100000) + assert diffs is not None diffs.reader.apply(h, simplify=True) assert [1, 1, 1, 0] == h.counts @@ -306,7 +303,6 @@ def test_apply_reader_with_location(httpserver): n1 x10.0 y23.0 w1 Nn1,n2 """)) - svr = rserv.ReplicationServer(httpserver.url_for(''), "opl") class Handler(CountingHandler): def way(self, w): @@ -318,10 +314,10 @@ def way(self, w): assert 2 == w.nodes[1].ref assert not w.nodes[1].location.valid() - h = Handler() - diffs = svr.collect_diffs(100, 100000) - assert diffs is not None + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = Handler() + diffs = svr.collect_diffs(100, 100000) + assert diffs is not None diffs.reader.apply(h, idx="flex_mem") - assert h.counts == [1, 1, 0, 0] From df5fb8172e0b50f70e471845105276ec61a12208 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 18 Nov 2023 17:38:30 +0100 Subject: [PATCH 3/5] move pyosmium-get_changes tests to pytest-httpserver --- test/test_pyosmium_get_changes.py | 72 ++++++++++--------------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/test/test_pyosmium_get_changes.py b/test/test_pyosmium_get_changes.py index 36be98c2..595b9a8d 100644 --- a/test/test_pyosmium_get_changes.py +++ b/test/test_pyosmium_get_changes.py @@ -2,10 +2,9 @@ # # This file is part of Pyosmium. # -# Copyright (C) 2022 Sarah Hoffmann. +# Copyright (C) 2023 Sarah Hoffmann. """ Tests for the pyosmium-get-changes script. """ -from io import BytesIO from pathlib import Path from textwrap import dedent @@ -18,101 +17,76 @@ import cookielib as cookiejarlib -class RequestsResponses(BytesIO): - - def __init__(self, bytes): - super(RequestsResponses, self).__init__(bytes) - self.content = bytes - - def iter_lines(self): - return self.readlines() - - class TestPyosmiumGetChanges: @pytest.fixture(autouse=True) - def setUp(self, monkeypatch): + def setup(self): self.script = dict() filename = (Path(__file__) / ".." / ".." / "tools"/ "pyosmium-get-changes").resolve() with filename.open("rb") as f: exec(compile(f.read(), str(filename), 'exec'), self.script) - self.urls = dict() - - - @pytest.fixture - def mock_requests(self, monkeypatch): - def mock_get(session, url, **kwargs): - return RequestsResponses(self.urls[url]) - monkeypatch.setattr(osmium.replication.server.requests.Session, "get", mock_get) - - - def url(self, url, result): - self.urls[url] = dedent(result).encode() - def main(self, *args): - return self.script['main'](args) + def main(self, httpserver, *args): + return self.script['main'](['--server', httpserver.url_for('')] + list(args)) - def test_init_id(self, capsys): - assert 0 == self.main('-I', '453') + def test_init_id(self, capsys, httpserver): + assert 0 == self.main(httpserver, '-I', '453') output = capsys.readouterr().out.strip() assert output == '453' - def test_init_date(self, capsys, mock_requests): - self.url('https://planet.osm.org/replication/minute//state.txt', - """\ + def test_init_date(self, capsys, httpserver): + httpserver.expect_request('/state.txt').respond_with_data(dedent("""\ sequenceNumber=100 timestamp=2017-08-26T11\\:04\\:02Z - """) - self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt', - """\ + """)) + httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\ sequenceNumber=0 timestamp=2016-08-26T11\\:04\\:02Z - """) - assert 0 == self.main('-D', '2015-12-24T08:08:08Z') + """)) + assert 0 == self.main(httpserver, '-D', '2015-12-24T08:08:08Z') output = capsys.readouterr().out.strip() assert output == '-1' - def test_init_to_file(self, tmp_path): + def test_init_to_file(self, tmp_path, httpserver): fname = tmp_path / 'db.seq' - assert 0 == self.main('-I', '453', '-f', str(fname)) + assert 0 == self.main(httpserver, '-I', '453', '-f', str(fname)) assert fname.read_text() == '453' - def test_init_from_seq_file(self, tmp_path): + def test_init_from_seq_file(self, tmp_path, httpserver): fname = tmp_path / 'db.seq' fname.write_text('453') - assert 0 == self.main('-f', str(fname)) + assert 0 == self.main(httpserver, '-f', str(fname)) assert fname.read_text() == '453' - def test_init_date_with_cookie(self, capsys, tmp_path, mock_requests): - self.url('https://planet.osm.org/replication/minute//state.txt', - """\ + def test_init_date_with_cookie(self, capsys, tmp_path, httpserver): + httpserver.expect_request('/state.txt').respond_with_data(dedent("""\ sequenceNumber=100 timestamp=2017-08-26T11\\:04\\:02Z - """) - self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt', - """\ + """)) + httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\ sequenceNumber=0 timestamp=2016-08-26T11\\:04\\:02Z - """) + """)) fname = tmp_path / 'my.cookie' cookie_jar = cookiejarlib.MozillaCookieJar(str(fname)) cookie_jar.save() - assert 0 == self.main('--cookie', str(fname), '-D', '2015-12-24T08:08:08Z') + assert 0 == self.main(httpserver, '--cookie', str(fname), + '-D', '2015-12-24T08:08:08Z') output = capsys.readouterr().out.strip() From 0fe27b0b29d59cdfbe5133acf688fb8ab3658c68 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 18 Nov 2023 21:48:10 +0100 Subject: [PATCH 4/5] raise exceptions on non-200 HTTP returns and retry implicitly Requests does not throw on HTTP errors, so pyosmium was just consuming the error message. Fix that. Also add a retry mechanism for transient errors. The list of transient status codes is the same as the one that curl uses. --- src/osmium/replication/server.py | 19 +++-- test/test_replication.py | 119 +++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 4 deletions(-) diff --git a/src/osmium/replication/server.py b/src/osmium/replication/server.py index 21307be2..dfa262ab 100644 --- a/src/osmium/replication/server.py +++ b/src/osmium/replication/server.py @@ -7,7 +7,6 @@ """ Helper functions to communicate with replication servers. """ from typing import NamedTuple, Optional, Any, Iterator, cast, Dict, Mapping, Tuple -import requests import urllib.request as urlrequest from urllib.error import URLError import datetime as dt @@ -15,6 +14,10 @@ from contextlib import contextmanager from math import ceil +import requests +from requests.adapters import HTTPAdapter +from urllib3.util import Retry + from osmium import MergeInputReader, BaseHandler from osmium import io as oio from osmium import version @@ -52,6 +55,8 @@ def __init__(self, url: str, diff_type: str = 'osc.gz') -> None: self.diff_type = diff_type self.extra_request_params: dict[str, Any] = dict(timeout=60, stream=True) self.session: Optional[requests.Session] = None + self.retry = Retry(total=3, backoff_factor=0.5, allowed_methods={'GET'}, + status_forcelist=[408, 429, 500, 502, 503, 504]) def close(self) -> None: """ Close any open connection to the replication server. @@ -62,6 +67,8 @@ def close(self) -> None: def __enter__(self) -> 'ReplicationServer': self.session = requests.Session() + self.session.mount('http://', HTTPAdapter(max_retries=self.retry)) + self.session.mount('https://', HTTPAdapter(max_retries=self.retry)) return self def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: @@ -97,6 +104,8 @@ def open_url(self, url: urlrequest.Request) -> Any: @contextmanager def _get_url_with_session() -> Iterator[requests.Response]: with requests.Session() as session: + session.mount('http://', HTTPAdapter(max_retries=self.retry)) + session.mount('https://', HTTPAdapter(max_retries=self.retry)) request = session.get(url.get_full_url(), **get_params) yield request @@ -133,7 +142,7 @@ def collect_diffs(self, start_id: int, max_size: int = 1024) -> Optional[Downloa try: diffdata = self.get_diff_block(current_id) except: - LOG.debug("Error during diff download. Bailing out.") + LOG.error("Error during diff download. Bailing out.") diffdata = '' if len(diffdata) == 0: if start_id == current_id: @@ -348,6 +357,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona with self.open_url(self.make_request(self.get_state_url(seq))) as response: if hasattr(response, 'iter_lines'): # generated by requests + response.raise_for_status() lines = response.iter_lines() else: lines = response @@ -372,7 +382,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona ts = ts.replace(tzinfo=dt.timezone.utc) except (URLError, IOError) as err: - LOG.debug("Loading state info %s failed with: %s", seq, str(err)) + LOG.debug("Loading state info failed with: %s", str(err)) return None if ts is not None and next_seq is not None: @@ -382,12 +392,13 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona def get_diff_block(self, seq: int) -> str: """ Downloads the diff with the given sequence number and returns - it as a byte sequence. Throws a :code:`urllib.error.HTTPError` + it as a byte sequence. Throws an :code:`requests.HTTPError` if the file cannot be downloaded. """ with self.open_url(self.make_request(self.get_diff_url(seq))) as resp: if hasattr(resp, 'content'): # generated by requests + resp.raise_for_status() return cast(str, resp.content) # generated by urllib.request diff --git a/test/test_replication.py b/test/test_replication.py index f431848e..023c9cdb 100644 --- a/test/test_replication.py +++ b/test/test_replication.py @@ -3,6 +3,7 @@ # This file is part of Pyosmium. # # Copyright (C) 2023 Sarah Hoffmann. +import logging import time from textwrap import dedent @@ -124,6 +125,35 @@ def test_get_state_timestamp_cut(httpserver): assert res.sequence == 2594669 +def test_get_state_permanent_error(httpserver, caplog): + httpserver.expect_request('/state.txt').respond_with_data('stuff', status=404) + + with caplog.at_level(logging.DEBUG): + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is None + assert "Loading state info failed" in caplog.text + + +def test_get_state_transient_error(httpserver): + httpserver.expect_ordered_request('/state.txt').respond_with_data('stuff', status=500) + httpserver.expect_ordered_request('/state.txt').respond_with_data('stuff', status=500) + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + #Sat Aug 26 11:04:04 UTC 2017 + txnMaxQueried=1219304113 + sequenceNumber=2594669 + timestamp=2017-08-26T11\\:04\\:02Z + txnReadyList= + txnMax=1219304113 + txnActiveList=1219303583,1219304054,1219304104""") + + res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info() + + assert res is not None + assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2) + assert res.sequence == 2594669 + + def test_get_state_too_many_retries(httpserver): httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ #Sat Aug 26 11:04:04 UTC 2017 @@ -321,3 +351,92 @@ def way(self, w): assert diffs is not None diffs.reader.apply(h, idx="flex_mem") assert h.counts == [1, 1, 0, 0] + + +def test_apply_diffs_permanent_error(httpserver, caplog): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=100 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl')\ + .respond_with_data('not a file', status=404) + + with caplog.at_level(logging.ERROR): + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert None == svr.apply_diffs(h, 100, 10000) + assert h.counts == [0, 0, 0, 0] + + assert 'Error during diff download' in caplog.text + + +def test_apply_diffs_permanent_error_later_diff(httpserver, caplog): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=101 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 x10.0 y23.0 + w1 Nn1,n2 + """)) + httpserver.expect_ordered_request('/000/000/101.opl')\ + .respond_with_data('not a file', status=404) + + with caplog.at_level(logging.ERROR): + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000) + assert h.counts == [1, 1, 0, 0] + + assert 'Error during diff download' in caplog.text + + +def test_apply_diffs_transient_error(httpserver, caplog): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=101 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 x10.0 y23.0 + w1 Nn1,n2 + """)) + httpserver.expect_ordered_request('/000/000/101.opl')\ + .respond_with_data('not a file', status=503) + httpserver.expect_ordered_request('/000/000/101.opl').respond_with_data(dedent("""\ + n2 x10.0 y23.0 + """)) + + with caplog.at_level(logging.ERROR): + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 101 == svr.apply_diffs(h, 100, 10000) + assert h.counts == [2, 1, 0, 0] + + assert 'Error during diff download' not in caplog.text + + + + +def test_apply_diffs_transient_error_permanent(httpserver, caplog): + httpserver.expect_ordered_request('/state.txt').respond_with_data("""\ + sequenceNumber=101 + timestamp=2017-08-26T11\\:04\\:02Z + """) + httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\ + n1 x10.0 y23.0 + w1 Nn1,n2 + """)) + for _ in range(4): + httpserver.expect_ordered_request('/000/000/101.opl')\ + .respond_with_data('not a file', status=503) + httpserver.expect_ordered_request('/000/000/101.opl').respond_with_data(dedent("""\ + n2 x10.0 y23.0 + """)) + + with caplog.at_level(logging.ERROR): + with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr: + h = CountingHandler() + assert 100 == svr.apply_diffs(h, 100, 10000) + assert h.counts == [1, 1, 0, 0] + + assert 'Error during diff download' in caplog.text From 795dc0dec14248f8f94534bcdcaddcea8aea76b5 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 18 Nov 2023 21:56:23 +0100 Subject: [PATCH 5/5] add new pytest-httpserver dependency for CI --- .github/actions/run-tests/action.yml | 2 +- .github/workflows/build_wheels.yml | 2 +- .github/workflows/ci.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index c718f531..f381624f 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -4,7 +4,7 @@ runs: using: "composite" steps: - name: Install test requirements - run: pip install pytest shapely + run: pip install pytest pytest-httpserver shapely shell: bash - name: Run tests diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 5ebeabe4..04ace7f3 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -40,7 +40,7 @@ jobs: env: CIBW_ARCHS: native CIBW_SKIP: "pp* *musllinux*" - CIBW_TEST_REQUIRES: pytest shapely + CIBW_TEST_REQUIRES: pytest pytest-httpserver shapely CIBW_TEST_COMMAND: pytest {project}/test CIBW_BUILD_FRONTEND: build CIBW_BEFORE_BUILD_LINUX: yum install -y sparsehash-devel expat-devel boost-devel zlib-devel bzip2-devel lz4-devel diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db2a813b..bc38ee0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -182,7 +182,7 @@ jobs: - name: Install prerequisites run: | python -m pip install --upgrade pip - pip install pytest shapely setuptools requests + pip install pytest pytest-httpserver shapely setuptools requests shell: bash - name: Build package