Skip to content

Commit

Permalink
fix(ingest-xmlupload): strip leading / of absolute paths before sen…
Browse files Browse the repository at this point in the history
…ding to ingest (DEV-4300) (#1252)

Co-authored-by: Nora-Olivia-Ammann <[email protected]>
  • Loading branch information
jnussbaum and Nora-Olivia-Ammann authored Nov 1, 2024
1 parent 435e283 commit 695b189
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 7 deletions.
28 changes: 22 additions & 6 deletions src/dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Iterator

import regex
from loguru import logger
from requests import JSONDecodeError
from requests import RequestException
Expand Down Expand Up @@ -58,16 +59,17 @@ def upload_file(
filepath: Path,
) -> UploadFailure | None:
"""Uploads a file to the ingest server."""
url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{urllib.parse.quote(str(filepath))}"
headers = {"Content-Type": "application/octet-stream"}
timeout = 600
err_msg = f"Failed to upload '{filepath}' to '{url}'."
try:
with open(self.imgdir / filepath, "rb") as binary_io:
content = binary_io.read()
except OSError as e:
err_msg = f"Cannot bulk-ingest {filepath}, because the file could not be opened/read: {e.strerror}"
logger.error(err_msg)
return UploadFailure(filepath, f"File could not be opened/read: {e.strerror}")
return UploadFailure(filepath, err_msg)
url = self._build_url_for_bulk_ingest_ingest_route(filepath)
headers = {"Content-Type": "application/octet-stream"}
timeout = 600
err_msg = f"Failed to upload '{filepath}' to '{url}'."
try:
logger.debug(f"REQUEST: POST to {url}, timeout: {timeout}, headers: {headers}")
res = self.session.post(
Expand All @@ -84,9 +86,23 @@ def upload_file(
logger.error(err_msg)
reason = f"Response {res.status_code}: {res.text}" if res.text else f"Response {res.status_code}"
return UploadFailure(filepath, reason)

return None

def _build_url_for_bulk_ingest_ingest_route(self, filepath: Path) -> str:
"""
Remove the leading slash of absolute filepaths,
because the /project/<shortcode>/bulk-ingest/ingest route only accepts relative paths.
The leading slash has to be added again in the "ingest-xmlupload" step, when applying the ingest ID.
Args:
filepath: filepath
Returns:
url
"""
quoted = regex.sub(r"^\/", "", urllib.parse.quote(str(filepath)))
return f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{quoted}"

def trigger_ingest_process(self) -> None:
"""Start the ingest process on the server."""
url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def tmp_file(tmp_path: Path) -> Path:

def _make_url(file: Path) -> str:
filename = urllib.parse.quote(str(file))
filename = filename[1:] if filename.startswith("/") else filename
return f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest/ingest/{filename}"


Expand All @@ -51,7 +52,7 @@ def test_upload_file_with_inexisting_file(ingest_client: BulkIngestClient) -> No
failure_detail = ingest_client.upload_file(Path("inexisting.xml"))
assert failure_detail
assert failure_detail.filepath == Path("inexisting.xml")
assert failure_detail.reason == "File could not be opened/read: No such file or directory"
assert re.search(r"the file could not be opened/read", failure_detail.reason)


def test_upload_file_failure_upon_request_exception(
Expand Down Expand Up @@ -87,6 +88,21 @@ def test_upload_file_failure_upon_server_error_with_response_text(
assert failure_detail.reason == "Response 500: response text"


@pytest.mark.parametrize(
("filepath", "url_suffix"),
[
(Path("Côté gauche/Süd.png"), "C%C3%B4t%C3%A9%20gauche/S%C3%BCd.png"),
(Path("/absolute/path/to/file.txt"), "absolute/path/to/file.txt"),
],
)
def test_build_url_for_bulk_ingest_ingest_route(
ingest_client: BulkIngestClient, filepath: Path, url_suffix: str
) -> None:
res = ingest_client._build_url_for_bulk_ingest_ingest_route(filepath)
common_part = f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest/ingest/"
assert res == f"{common_part}{url_suffix}"


def test_trigger_if_success(ingest_client: BulkIngestClient, requests_mock: Mocker) -> None:
url = f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest"
requests_mock.post(url, status_code=202, text=json.dumps({"id": SHORTCODE}))
Expand Down

0 comments on commit 695b189

Please sign in to comment.