Skip to content

Commit

Permalink
fix: omitting metadata during direct upload (#157)
Browse files Browse the repository at this point in the history
* fix: omitting metadata during direct upload

* tests: make error more verbose

* tests: ignore test files

* tests: drop old code
  • Loading branch information
wochinge authored Feb 2, 2024
1 parent 7ced654 commit e99191f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,4 @@ cython_debug/
temp
.idea
.python-version
.DS_Store
3 changes: 1 addition & 2 deletions deepset_cloud_sdk/_api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,7 @@ async def direct_upload_path(
response = await self._deepset_cloud_api.post(
workspace_name,
"files",
files={"file": (file_name, file)},
json={"meta": meta},
files={"file": (file_name, file), "meta": (None, json.dumps(meta))},
params={"write_mode": write_mode.value},
)
if response.status_code != codes.CREATED or response.json().get("file_id") is None:
Expand Down
40 changes: 40 additions & 0 deletions tests/integration/service/test_integration_files_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ async def test_direct_upload_path(self, integration_config: CommonConfig, worksp
assert result.failed_upload_count == 0
assert len(result.failed) == 0

names_of_uploaded_files = [
file.name
for file in Path("./tests/test_data/msmarco.10").glob("*.txt")
if not file.name.endswith(".meta.json")
]
# Check the metadata was uploaded correctly
files: List[File] = []
async for file_batch in file_service.list_all(
workspace_name=workspace_name,
batch_size=11,
timeout_s=120,
):
files += file_batch

for file in files:
if file.name in names_of_uploaded_files:
assert (
file.meta.get("source") == "msmarco"
), f"Metadata was not uploaded correctly for file '{file.name}': {file.meta}"

async def test_async_upload(
self, integration_config: CommonConfig, workspace_name: str, monkeypatch: MonkeyPatch
) -> None:
Expand All @@ -49,6 +69,26 @@ async def test_async_upload(
assert result.failed_upload_count == 0
assert len(result.failed) == 0

names_of_uploaded_files = [
file.name
for file in Path("./tests/test_data/msmarco.10").glob("*.txt")
if not file.name.endswith(".meta.json")
]
# Check the metadata was uploaded correctly
files: List[File] = []
async for file_batch in file_service.list_all(
workspace_name=workspace_name,
batch_size=11,
timeout_s=120,
):
files += file_batch

for file in files:
if file.name in names_of_uploaded_files:
assert (
file.meta.get("source") == "msmarco"
), f"Metadata was not uploaded correctly for file '{file.name}': {file.meta}"

async def test_upload_texts(self, integration_config: CommonConfig, workspace_name: str) -> None:
async with FilesService.factory(integration_config) as file_service:
files = [
Expand Down
Binary file removed tests/test_data/msmarco.10/.DS_Store
Binary file not shown.
9 changes: 3 additions & 6 deletions tests/unit/api/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,7 @@ async def test_direct_upload_file(self, files_api: FilesAPI, mocked_deepset_clou
mocked_deepset_cloud_api.post.assert_called_once_with(
"test_workspace",
"files",
files={"file": ("basic.txt", ANY)},
json={"meta": {"key": "value"}},
files={"file": ("basic.txt", ANY), "meta": (None, '{"key": "value"}')},
params={
"write_mode": "OVERWRITE",
},
Expand All @@ -352,8 +351,7 @@ async def test_direct_upload_file_with_name(self, files_api: FilesAPI, mocked_de
mocked_deepset_cloud_api.post.assert_called_once_with(
"test_workspace",
"files",
files={"file": ("my_file.txt", ANY)},
json={"meta": {"key": "value"}},
files={"file": ("my_file.txt", ANY), "meta": (None, '{"key": "value"}')},
params={"write_mode": "OVERWRITE"},
)

Expand All @@ -373,8 +371,7 @@ async def test_direct_upload_with_path_as_string(self, files_api: FilesAPI, mock
mocked_deepset_cloud_api.post.assert_called_once_with(
"test_workspace",
"files",
files={"file": ("my_file.txt", ANY)},
json={"meta": {"key": "value"}},
files={"file": ("my_file.txt", ANY), "meta": (None, '{"key": "value"}')},
params={"write_mode": "FAIL"},
)

Expand Down

0 comments on commit e99191f

Please sign in to comment.