From 20fd67091a81ea51457c73ebd240a6fc13cf048d Mon Sep 17 00:00:00 2001 From: Raghavendra Dani Date: Wed, 23 Oct 2024 21:01:50 -0700 Subject: [PATCH 1/3] Add a case when in-memory-size is also 0 --- deltacat/compute/resource_estimation/delta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deltacat/compute/resource_estimation/delta.py b/deltacat/compute/resource_estimation/delta.py index 97f137a1..62500552 100644 --- a/deltacat/compute/resource_estimation/delta.py +++ b/deltacat/compute/resource_estimation/delta.py @@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling( sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length sampled_num_rows += len(tbl) - if not sampled_on_disk_size: + if not sampled_on_disk_size or not sampled_in_memory_size: return EstimatedResources.of( memory_bytes=0, statistics=Statistics.of( From 6c39042be61e3914043dad5ed60530216bc1587f Mon Sep 17 00:00:00 2001 From: Raghavendra Dani Date: Wed, 23 Oct 2024 21:49:26 -0700 Subject: [PATCH 2/3] Add UTs --- .../compute/resource_estimation/test_delta.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/deltacat/tests/compute/resource_estimation/test_delta.py b/deltacat/tests/compute/resource_estimation/test_delta.py index 8c6acc58..aeab34c6 100644 --- a/deltacat/tests/compute/resource_estimation/test_delta.py +++ b/deltacat/tests/compute/resource_estimation/test_delta.py @@ -437,6 +437,43 @@ def test_delta_manifest_parquet_when_file_sampling( == parquet_delta_with_manifest.meta.content_length ) + def test_parquet_delta_when_file_sampling_and_arrow_size_zero( + self, + local_deltacat_storage_kwargs, + parquet_delta_with_manifest: Delta, + monkeypatch, + ): + params = EstimateResourcesParams.of( + resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING, + max_files_to_sample=2, + ) + + def mock_func(*args, **kwargs): + class MockedValue: + nbytes = 0 + + def __len__(self): + return 0 + + return MockedValue() + + monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func) + + result = estimate_resources_required_to_process_delta( + delta=parquet_delta_with_manifest, + operation_type=OperationType.PYARROW_DOWNLOAD, + deltacat_storage=ds, + deltacat_storage_kwargs=local_deltacat_storage_kwargs, + estimate_resources_params=params, + ) + + assert parquet_delta_with_manifest.manifest is not None + assert result.memory_bytes == 0 + assert ( + result.statistics.on_disk_size_bytes + == parquet_delta_with_manifest.meta.content_length + ) + def test_delta_manifest_utsv_when_file_sampling( self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta ): From 4975c06d28726aedfb5ddd5d3fd0b635cb3040ef Mon Sep 17 00:00:00 2001 From: Raghavendra Dani Date: Wed, 23 Oct 2024 21:49:56 -0700 Subject: [PATCH 3/3] bump version --- deltacat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deltacat/__init__.py b/deltacat/__init__.py index 1f19744e..be70e67d 100644 --- a/deltacat/__init__.py +++ b/deltacat/__init__.py @@ -44,7 +44,7 @@ deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__)) -__version__ = "1.1.26" +__version__ = "1.1.27" __all__ = [