diff --git a/src/dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py b/src/dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py index 77f164feff..199cec7a2a 100644 --- a/src/dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +++ b/src/dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Iterator +import regex from loguru import logger from requests import JSONDecodeError from requests import RequestException @@ -58,16 +59,17 @@ def upload_file( filepath: Path, ) -> UploadFailure | None: """Uploads a file to the ingest server.""" - url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{urllib.parse.quote(str(filepath))}" - headers = {"Content-Type": "application/octet-stream"} - timeout = 600 - err_msg = f"Failed to upload '{filepath}' to '{url}'." try: with open(self.imgdir / filepath, "rb") as binary_io: content = binary_io.read() except OSError as e: + err_msg = f"Cannot bulk-ingest {filepath}, because the file could not be opened/read: {e.strerror}" logger.error(err_msg) - return UploadFailure(filepath, f"File could not be opened/read: {e.strerror}") + return UploadFailure(filepath, err_msg) + url = self._build_url_for_bulk_ingest_ingest_route(filepath) + headers = {"Content-Type": "application/octet-stream"} + timeout = 600 + err_msg = f"Failed to upload '{filepath}' to '{url}'." try: logger.debug(f"REQUEST: POST to {url}, timeout: {timeout}, headers: {headers}") res = self.session.post( @@ -84,9 +86,23 @@ def upload_file( logger.error(err_msg) reason = f"Response {res.status_code}: {res.text}" if res.text else f"Response {res.status_code}" return UploadFailure(filepath, reason) - return None + def _build_url_for_bulk_ingest_ingest_route(self, filepath: Path) -> str: + """ + Remove the leading slash of absolute filepaths, + because the /project//bulk-ingest/ingest route only accepts relative paths. + The leading slash has to be added again in the "ingest-xmlupload" step, when applying the ingest ID. + + Args: + filepath: filepath + + Returns: + url + """ + quoted = regex.sub(r"^\/", "", urllib.parse.quote(str(filepath))) + return f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{quoted}" + def trigger_ingest_process(self) -> None: """Start the ingest process on the server.""" url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest" diff --git a/test/unittests/commands/ingest_xmlupload/test_bulk_ingest_client.py b/test/unittests/commands/ingest_xmlupload/test_bulk_ingest_client.py index 6b856873d3..3b3621c584 100644 --- a/test/unittests/commands/ingest_xmlupload/test_bulk_ingest_client.py +++ b/test/unittests/commands/ingest_xmlupload/test_bulk_ingest_client.py @@ -28,6 +28,7 @@ def tmp_file(tmp_path: Path) -> Path: def _make_url(file: Path) -> str: filename = urllib.parse.quote(str(file)) + filename = filename[1:] if filename.startswith("/") else filename return f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest/ingest/{filename}" @@ -51,7 +52,7 @@ def test_upload_file_with_inexisting_file(ingest_client: BulkIngestClient) -> No failure_detail = ingest_client.upload_file(Path("inexisting.xml")) assert failure_detail assert failure_detail.filepath == Path("inexisting.xml") - assert failure_detail.reason == "File could not be opened/read: No such file or directory" + assert re.search(r"the file could not be opened/read", failure_detail.reason) def test_upload_file_failure_upon_request_exception( @@ -87,6 +88,21 @@ def test_upload_file_failure_upon_server_error_with_response_text( assert failure_detail.reason == "Response 500: response text" +@pytest.mark.parametrize( + ("filepath", "url_suffix"), + [ + (Path("Côté gauche/Süd.png"), "C%C3%B4t%C3%A9%20gauche/S%C3%BCd.png"), + (Path("/absolute/path/to/file.txt"), "absolute/path/to/file.txt"), + ], +) +def test_build_url_for_bulk_ingest_ingest_route( + ingest_client: BulkIngestClient, filepath: Path, url_suffix: str +) -> None: + res = ingest_client._build_url_for_bulk_ingest_ingest_route(filepath) + common_part = f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest/ingest/" + assert res == f"{common_part}{url_suffix}" + + def test_trigger_if_success(ingest_client: BulkIngestClient, requests_mock: Mocker) -> None: url = f"{DSP_INGEST_URL}/projects/{SHORTCODE}/bulk-ingest" requests_mock.post(url, status_code=202, text=json.dumps({"id": SHORTCODE}))