Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

Commit

Permalink
Freesound SSLError fix (#330)
Browse files Browse the repository at this point in the history
* Use retry library to catch and retry SSLErrors

* Add test for retry condition

* Pin retry version
  • Loading branch information
AetherUnbound authored Jan 25, 2022
1 parent 8de8d70 commit cadc9d3
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
22 changes: 18 additions & 4 deletions openverse_catalog/dags/providers/provider_api_scripts/freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from common.loader import provider_details as prov
from common.requester import DelayedRequester
from common.storage.audio import AudioStore
from requests.exceptions import SSLError
from retry import retry


LIMIT = 150
Expand Down Expand Up @@ -165,7 +167,11 @@ def _extract_audio_data(media_data):
# for playing on the frontend,
# and the actual uploaded file as an alt_file that is available
# for download (and requires a user to be authenticated to download)
main_audio, alt_files = _get_audio_files(media_data)
try:
main_audio, alt_files = _get_audio_files(media_data)
except SSLError:
logger.warning(f"Unable to get file size for {foreign_landing_url}, skipping")
return None
if main_audio is None:
return None

Expand Down Expand Up @@ -222,6 +228,16 @@ def _get_preview_filedata(preview_type, preview_url):
}


@retry(SSLError, tries=3, delay=1, backoff=2)
def _get_audio_file_size(url):
"""
Get the content length of a provided URL.
Freesound can be a bit finicky, so we want to retry it a few times
"""
# TODO(obulat): move filesize detection to the polite crawler
return int(requests.head(url).headers["content-length"])


def _get_audio_files(media_data):
# This is the original file, needs auth for downloading.
# bit_rate in kilobytes, converted to bytes
Expand All @@ -240,9 +256,7 @@ def _get_audio_files(media_data):
return None
main_file = _get_preview_filedata("preview-hq-mp3", previews["preview-hq-mp3"])
main_file["audio_url"] = main_file.pop("url")
# TODO(obulat): move filesize detection to the polite crawler
filesize = requests.head(main_file["audio_url"]).headers["content-length"]
main_file["filesize"] = int(filesize)
main_file["filesize"] = _get_audio_file_size(main_file["audio_url"])
return main_file, alt_files


Expand Down
1 change: 1 addition & 0 deletions requirements_prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ lxml
psycopg2-binary
requests-file==1.5.1
requests-oauthlib
retry==0.9.2
tldextract==3.1.0
11 changes: 11 additions & 0 deletions tests/dags/providers/provider_api_scripts/test_freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
from common.licenses.licenses import LicenseInfo
from providers.provider_api_scripts import freesound
from requests.exceptions import SSLError


RESOURCES = Path(__file__).parent.resolve() / "resources/freesound"
Expand Down Expand Up @@ -43,6 +44,16 @@ def test_get_audio_pages_returns_correctly_with_no_results():
assert actual_result == expect_result


def test_get_audio_file_size_retries_and_does_not_raise(audio_data):
expected_result = None
with patch("requests.head") as head_patch:
head_patch.side_effect = SSLError("whoops")
actual_result = freesound._extract_audio_data(audio_data)

assert head_patch.call_count == 3
assert actual_result == expected_result


def test_get_query_params_adds_page_number():
actual_qp = freesound._get_query_params(page_number=2)
assert actual_qp["page"] == str(2)
Expand Down

0 comments on commit cadc9d3

Please sign in to comment.