From 635ba6b9465868a5932fb58fe8d4512e4e4da019 Mon Sep 17 00:00:00 2001 From: Kristof Herrmann Date: Tue, 30 Apr 2024 10:29:12 +0200 Subject: [PATCH] fix: lower file type comparison --- deepset_cloud_sdk/_service/files_service.py | 25 +++++++++---------- .../service/test_integration_files_service.py | 9 ++++--- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/deepset_cloud_sdk/_service/files_service.py b/deepset_cloud_sdk/_service/files_service.py index aaeb553b..9b22d145 100644 --- a/deepset_cloud_sdk/_service/files_service.py +++ b/deepset_cloud_sdk/_service/files_service.py @@ -1,4 +1,5 @@ """Module for all file-related operations.""" + from __future__ import annotations import asyncio @@ -35,6 +36,7 @@ logger = structlog.get_logger(__name__) SUPPORTED_TYPE_SUFFIXES = [".csv", ".docx", ".html", ".json", ".md", ".txt", ".pdf", ".pptx", ".xlsx", ".xml"] +META_SUFFIX = ".meta.json" DIRECT_UPLOAD_THRESHOLD = 20 @@ -197,11 +199,11 @@ async def upload_file_paths( _raw_files = [ path for path in file_paths - if path.suffix.lower() in SUPPORTED_TYPE_SUFFIXES and not path.name.endswith(".meta.json") + if path.suffix in SUPPORTED_TYPE_SUFFIXES and not path.name.endswith(META_SUFFIX) ] for file_path in _raw_files: meta: Dict[str, Any] = {} - meta_path = Path(str(file_path) + ".meta.json") + meta_path = Path(str(file_path) + META_SUFFIX) if meta_path in file_paths: with meta_path.open("r") as meta_file: meta = json.loads(meta_file.read()) @@ -240,7 +242,7 @@ async def upload_file_paths( # wait for ingestion to finish if blocking: - total_files = len(list(filter(lambda x: not os.path.basename(x).endswith(".meta.json"), file_paths))) + total_files = len(list(filter(lambda x: not os.path.basename(x).endswith(META_SUFFIX), file_paths))) await self._wait_for_finished( workspace_name=workspace_name, session_id=upload_session.session_id, @@ -282,7 +284,7 @@ def _validate_file_paths(file_paths: List[Path]) -> None: """ logger.info("Validating file paths and metadata.") for file_path in file_paths: - if file_path.suffix.lower() not in SUPPORTED_TYPE_SUFFIXES: + if file_path.suffix not in SUPPORTED_TYPE_SUFFIXES: raise ValueError( f"Invalid file extension: {file_path.suffix}. Refer to the list of supported file types in `SUPPORTED_TYPE_SUFFIXES`. " "Metadata files should have the `.meta.json` extension." @@ -290,16 +292,16 @@ def _validate_file_paths(file_paths: List[Path]) -> None: meta_file_names = list( map( lambda fp: os.path.basename(fp), - [file_path for file_path in file_paths if str(file_path).lower().endswith(".meta.json")], + [file_path for file_path in file_paths if str(file_path).endswith(META_SUFFIX)], ) ) file_names = list(map(lambda fp: os.path.basename(fp), file_paths)) - file_name_set = set(filter(lambda fn: not fn.lower().endswith(".meta.json"), file_names)) + file_name_set = set(filter(lambda fn: not fn.endswith(META_SUFFIX), file_names)) not_mapped_meta_files = [ meta_file_name for meta_file_name in meta_file_names - if meta_file_name.lower().split(".meta.json")[0] not in file_name_set + if meta_file_name.split(META_SUFFIX)[0] not in file_name_set ] if len(not_mapped_meta_files) > 0: @@ -341,7 +343,7 @@ def _get_allowed_file_types(desired_file_types: Optional[List[Any]]) -> List[str return SUPPORTED_TYPE_SUFFIXES desired_types_processed: Set[str] = { - str(file_type).lower() if str(file_type).startswith(".") else f".{str(file_type).lower()}" + str(file_type) if str(file_type).startswith(".") else f".{str(file_type)}" for file_type in desired_file_types } allowed_types: Set[str] = { @@ -362,14 +364,11 @@ def _preprocess_paths( allowed_file_types: List[str] = FilesService._get_allowed_file_types(desired_file_types) allowed_meta_types: Tuple = tuple(f"{file_type}.meta.json" for file_type in allowed_file_types) - meta_file_path = [ - path for path in all_files if path.is_file() and str(path).lower().endswith(allowed_meta_types) - ] + meta_file_path = [path for path in all_files if path.is_file() and str(path).endswith(allowed_meta_types)] file_paths = [ path for path in all_files - if path.is_file() - and (path.suffix.lower() in allowed_file_types and not str(path).lower().endswith(".meta.json")) + if path.is_file() and (path.suffix in allowed_file_types and not str(path).endswith(META_SUFFIX)) ] combined_paths = meta_file_path + file_paths diff --git a/tests/integration/service/test_integration_files_service.py b/tests/integration/service/test_integration_files_service.py index ebc89ed0..a6a83f3f 100644 --- a/tests/integration/service/test_integration_files_service.py +++ b/tests/integration/service/test_integration_files_service.py @@ -10,6 +10,7 @@ from deepset_cloud_sdk._api.files import File from deepset_cloud_sdk._api.upload_sessions import WriteMode from deepset_cloud_sdk._service.files_service import ( + META_SUFFIX, SUPPORTED_TYPE_SUFFIXES, DeepsetCloudFile, FilesService, @@ -37,7 +38,7 @@ async def test_direct_upload_path(self, integration_config: CommonConfig, worksp names_of_uploaded_files = [ file.name for file in Path("./tests/test_data/msmarco.10").glob("*.txt") - if not file.name.endswith(".meta.json") + if not file.name.endswith(META_SUFFIX) ] # Check the metadata was uploaded correctly files: List[File] = [] @@ -76,7 +77,7 @@ async def test_direct_upload_path_multiple_file_types( local_file_names: List[str] = [ file.name for file in Path("./tests/test_data/multiple_file_types").glob("*") - if not file.name.endswith(".meta.json") + if not file.name.endswith(META_SUFFIX) ] uploaded_files: List[File] = [] @@ -119,7 +120,7 @@ async def test_async_upload( local_file_names: List[str] = [ file.name for file in Path("./tests/test_data/msmarco.10").glob("*.txt") - if not file.name.endswith(".meta.json") + if not file.name.endswith(META_SUFFIX) ] # Check the metadata was uploaded correctly uploaded_files: List[File] = [] @@ -164,7 +165,7 @@ async def test_async_upload_multiple_file_types( local_file_names: List[str] = [ file.name for file in Path("./tests/test_data/multiple_file_types").glob("*") - if not file.name.endswith(".meta.json") + if not file.name.endswith(META_SUFFIX) ] uploaded_files: List[File] = []