Skip to content

Commit

Permalink
Merge pull request #237 from lonvia/retry-on-500-errors
Browse files Browse the repository at this point in the history
Fix handling of HTTP errors for replication handler
  • Loading branch information
lonvia authored Nov 18, 2023
2 parents 0459fb4 + 795dc0d commit f9ca6c7
Show file tree
Hide file tree
Showing 7 changed files with 406 additions and 333 deletions.
2 changes: 1 addition & 1 deletion .github/actions/run-tests/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ runs:
using: "composite"
steps:
- name: Install test requirements
run: pip install pytest shapely
run: pip install pytest pytest-httpserver shapely
shell: bash

- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
env:
CIBW_ARCHS: native
CIBW_SKIP: "pp* *musllinux*"
CIBW_TEST_REQUIRES: pytest shapely
CIBW_TEST_REQUIRES: pytest pytest-httpserver shapely
CIBW_TEST_COMMAND: pytest {project}/test
CIBW_BUILD_FRONTEND: build
CIBW_BEFORE_BUILD_LINUX: yum install -y sparsehash-devel expat-devel boost-devel zlib-devel bzip2-devel lz4-devel
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ jobs:
- name: Install prerequisites
run: |
python -m pip install --upgrade pip
pip install pytest shapely setuptools requests
pip install pytest pytest-httpserver shapely setuptools requests
shell: bash

- name: Build package
Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,15 @@ They are mostly ports of the examples in Libosmium and osmium-contrib.

## Testing

There is a small test suite in the test directory. This provides regression
There is a small test suite in the test directory. This provides unit
test for the python bindings, it is not meant to be a test suite for Libosmium.

You'll need the Python `pytest` module. On Debian/Ubuntu install the package
`python3-pytest`.
Testing requires `pytest` and `pytest-httpserver`. On Debian/Ubuntu install
the dependencies with:

The suite can be run with:
sudo apt-get install python3-pytest python3-pytest-httpserver

The test suite can be run with:

pytest test

Expand Down
19 changes: 15 additions & 4 deletions src/osmium/replication/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
""" Helper functions to communicate with replication servers.
"""
from typing import NamedTuple, Optional, Any, Iterator, cast, Dict, Mapping, Tuple
import requests
import urllib.request as urlrequest
from urllib.error import URLError
import datetime as dt
from collections import namedtuple
from contextlib import contextmanager
from math import ceil

import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

from osmium import MergeInputReader, BaseHandler
from osmium import io as oio
from osmium import version
Expand Down Expand Up @@ -52,6 +55,8 @@ def __init__(self, url: str, diff_type: str = 'osc.gz') -> None:
self.diff_type = diff_type
self.extra_request_params: dict[str, Any] = dict(timeout=60, stream=True)
self.session: Optional[requests.Session] = None
self.retry = Retry(total=3, backoff_factor=0.5, allowed_methods={'GET'},
status_forcelist=[408, 429, 500, 502, 503, 504])

def close(self) -> None:
""" Close any open connection to the replication server.
Expand All @@ -62,6 +67,8 @@ def close(self) -> None:

def __enter__(self) -> 'ReplicationServer':
self.session = requests.Session()
self.session.mount('http://', HTTPAdapter(max_retries=self.retry))
self.session.mount('https://', HTTPAdapter(max_retries=self.retry))
return self

def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
Expand Down Expand Up @@ -97,6 +104,8 @@ def open_url(self, url: urlrequest.Request) -> Any:
@contextmanager
def _get_url_with_session() -> Iterator[requests.Response]:
with requests.Session() as session:
session.mount('http://', HTTPAdapter(max_retries=self.retry))
session.mount('https://', HTTPAdapter(max_retries=self.retry))
request = session.get(url.get_full_url(), **get_params)
yield request

Expand Down Expand Up @@ -133,7 +142,7 @@ def collect_diffs(self, start_id: int, max_size: int = 1024) -> Optional[Downloa
try:
diffdata = self.get_diff_block(current_id)
except:
LOG.debug("Error during diff download. Bailing out.")
LOG.error("Error during diff download. Bailing out.")
diffdata = ''
if len(diffdata) == 0:
if start_id == current_id:
Expand Down Expand Up @@ -348,6 +357,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
with self.open_url(self.make_request(self.get_state_url(seq))) as response:
if hasattr(response, 'iter_lines'):
# generated by requests
response.raise_for_status()
lines = response.iter_lines()
else:
lines = response
Expand All @@ -372,7 +382,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
ts = ts.replace(tzinfo=dt.timezone.utc)

except (URLError, IOError) as err:
LOG.debug("Loading state info %s failed with: %s", seq, str(err))
LOG.debug("Loading state info failed with: %s", str(err))
return None

if ts is not None and next_seq is not None:
Expand All @@ -382,12 +392,13 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona

def get_diff_block(self, seq: int) -> str:
""" Downloads the diff with the given sequence number and returns
it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
it as a byte sequence. Throws an :code:`requests.HTTPError`
if the file cannot be downloaded.
"""
with self.open_url(self.make_request(self.get_diff_url(seq))) as resp:
if hasattr(resp, 'content'):
# generated by requests
resp.raise_for_status()
return cast(str, resp.content)

# generated by urllib.request
Expand Down
72 changes: 23 additions & 49 deletions test/test_pyosmium_get_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
#
# This file is part of Pyosmium.
#
# Copyright (C) 2022 Sarah Hoffmann.
# Copyright (C) 2023 Sarah Hoffmann.
""" Tests for the pyosmium-get-changes script.
"""
from io import BytesIO
from pathlib import Path
from textwrap import dedent

Expand All @@ -18,101 +17,76 @@
import cookielib as cookiejarlib


class RequestsResponses(BytesIO):

def __init__(self, bytes):
super(RequestsResponses, self).__init__(bytes)
self.content = bytes

def iter_lines(self):
return self.readlines()


class TestPyosmiumGetChanges:

@pytest.fixture(autouse=True)
def setUp(self, monkeypatch):
def setup(self):
self.script = dict()

filename = (Path(__file__) / ".." / ".." / "tools"/ "pyosmium-get-changes").resolve()
with filename.open("rb") as f:
exec(compile(f.read(), str(filename), 'exec'), self.script)

self.urls = dict()


@pytest.fixture
def mock_requests(self, monkeypatch):
def mock_get(session, url, **kwargs):
return RequestsResponses(self.urls[url])
monkeypatch.setattr(osmium.replication.server.requests.Session, "get", mock_get)


def url(self, url, result):
self.urls[url] = dedent(result).encode()

def main(self, *args):
return self.script['main'](args)
def main(self, httpserver, *args):
return self.script['main'](['--server', httpserver.url_for('')] + list(args))


def test_init_id(self, capsys):
assert 0 == self.main('-I', '453')
def test_init_id(self, capsys, httpserver):
assert 0 == self.main(httpserver, '-I', '453')

output = capsys.readouterr().out.strip()

assert output == '453'


def test_init_date(self, capsys, mock_requests):
self.url('https://planet.osm.org/replication/minute//state.txt',
"""\
def test_init_date(self, capsys, httpserver):
httpserver.expect_request('/state.txt').respond_with_data(dedent("""\
sequenceNumber=100
timestamp=2017-08-26T11\\:04\\:02Z
""")
self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt',
"""\
"""))
httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\
sequenceNumber=0
timestamp=2016-08-26T11\\:04\\:02Z
""")
assert 0 == self.main('-D', '2015-12-24T08:08:08Z')
"""))
assert 0 == self.main(httpserver, '-D', '2015-12-24T08:08:08Z')

output = capsys.readouterr().out.strip()

assert output == '-1'


def test_init_to_file(self, tmp_path):
def test_init_to_file(self, tmp_path, httpserver):
fname = tmp_path / 'db.seq'

assert 0 == self.main('-I', '453', '-f', str(fname))
assert 0 == self.main(httpserver, '-I', '453', '-f', str(fname))
assert fname.read_text() == '453'


def test_init_from_seq_file(self, tmp_path):
def test_init_from_seq_file(self, tmp_path, httpserver):
fname = tmp_path / 'db.seq'
fname.write_text('453')

assert 0 == self.main('-f', str(fname))
assert 0 == self.main(httpserver, '-f', str(fname))
assert fname.read_text() == '453'


def test_init_date_with_cookie(self, capsys, tmp_path, mock_requests):
self.url('https://planet.osm.org/replication/minute//state.txt',
"""\
def test_init_date_with_cookie(self, capsys, tmp_path, httpserver):
httpserver.expect_request('/state.txt').respond_with_data(dedent("""\
sequenceNumber=100
timestamp=2017-08-26T11\\:04\\:02Z
""")
self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt',
"""\
"""))
httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\
sequenceNumber=0
timestamp=2016-08-26T11\\:04\\:02Z
""")
"""))

fname = tmp_path / 'my.cookie'
cookie_jar = cookiejarlib.MozillaCookieJar(str(fname))
cookie_jar.save()

assert 0 == self.main('--cookie', str(fname), '-D', '2015-12-24T08:08:08Z')
assert 0 == self.main(httpserver, '--cookie', str(fname),
'-D', '2015-12-24T08:08:08Z')

output = capsys.readouterr().out.strip()

Expand Down
Loading

0 comments on commit f9ca6c7

Please sign in to comment.